Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e040ea5c0d
|
|
@ -0,0 +1,154 @@
|
|||
library(gsalib)
|
||||
require("ggplot2")
|
||||
require("gplots")
|
||||
|
||||
#
|
||||
# Standard command line switch. Can we loaded interactively for development
|
||||
# or executed with RScript
|
||||
#
|
||||
args = commandArgs(TRUE)
|
||||
onCMDLine = ! is.na(args[1])
|
||||
if ( onCMDLine ) {
|
||||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||
outputPDF = NA
|
||||
}
|
||||
|
||||
RUNTIME_UNITS = "(sec)"
|
||||
ORIGINAL_UNITS_TO_SECONDS = 1/1000
|
||||
|
||||
#
|
||||
# Helper function to aggregate all of the jobs in the report across all tables
|
||||
#
|
||||
allJobsFromReport <- function(report) {
|
||||
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
|
||||
sub <- lapply(report, function(table) table[,names])
|
||||
do.call("rbind", sub)
|
||||
}
|
||||
|
||||
#
|
||||
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
||||
#
|
||||
plotJobsGantt <- function(gatkReport, sortOverall) {
|
||||
allJobs = allJobsFromReport(gatkReport)
|
||||
if ( sortOverall ) {
|
||||
title = "All jobs, by analysis, by start time"
|
||||
allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
|
||||
} else {
|
||||
title = "All jobs, sorted by start time"
|
||||
allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
|
||||
}
|
||||
allJobs$index = 1:nrow(allJobs)
|
||||
minTime = min(allJobs$startTime)
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
||||
maxRelTime = max(allJobs$relDoneTime)
|
||||
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
|
||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||
p <- p + xlim(0, maxRelTime * 1.1)
|
||||
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
||||
p <- p + ylab("Job")
|
||||
p <- p + opts(title=title)
|
||||
print(p)
|
||||
}
|
||||
|
||||
#
|
||||
# Plots scheduling efficiency at job events
|
||||
#
|
||||
plotProgressByTime <- function(gatkReport) {
|
||||
allJobs = allJobsFromReport(gatkReport)
|
||||
nJobs = dim(allJobs)[1]
|
||||
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
|
||||
allJobs$index = 1:nrow(allJobs)
|
||||
|
||||
minTime = min(allJobs$startTime)
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||
|
||||
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
|
||||
|
||||
countJobs <- function(p) {
|
||||
s = allJobs$relStartTime
|
||||
e = allJobs$relDoneTime
|
||||
x = c() # I wish I knew how to make this work with apply
|
||||
for ( time in times )
|
||||
x = c(x, sum(p(s, e, time)))
|
||||
x
|
||||
}
|
||||
|
||||
pending = countJobs(function(s, e, t) s > t)
|
||||
done = countJobs(function(s, e, t) e < t)
|
||||
running = nJobs - pending - done
|
||||
|
||||
d = data.frame(times=times, pending=pending, running=running, done=done)
|
||||
|
||||
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
|
||||
p <- p + facet_grid(variable ~ ., scales="free")
|
||||
p <- p + geom_line(size=2)
|
||||
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
|
||||
p <- p + opts(title = "Job scheduling")
|
||||
print(p)
|
||||
}
|
||||
|
||||
#
|
||||
# Creates tables for each job in this group
|
||||
#
|
||||
standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
|
||||
plotGroup <- function(groupTable) {
|
||||
name = unique(groupTable$analysisName)[1]
|
||||
groupAnnotations = setdiff(names(groupTable), standardColumns)
|
||||
sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
|
||||
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
|
||||
|
||||
# create a table showing each job and all annotations
|
||||
textplot(sub, show.rownames=F)
|
||||
title(paste("Job summary for", name, "full itemization"), cex=3)
|
||||
|
||||
# create the table for each combination of values in the group, listing iterations in the columns
|
||||
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
|
||||
|
||||
# as above, but averaging over all iterations
|
||||
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||
}
|
||||
|
||||
# print out some useful basic information
|
||||
print("Report")
|
||||
print(paste("Project :", inputFileName))
|
||||
|
||||
convertUnits <- function(gatkReportData) {
|
||||
convertGroup <- function(g) {
|
||||
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g
|
||||
}
|
||||
lapply(gatkReportData, convertGroup)
|
||||
}
|
||||
|
||||
|
||||
# read the table
|
||||
gatkReportData <- gsa.read.gatkreport(inputFileName)
|
||||
gatkReportData <- convertUnits(gatkReportData)
|
||||
#print(summary(gatkReportData))
|
||||
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
pdf(outputPDF, height=8.5, width=11)
|
||||
}
|
||||
|
||||
plotJobsGantt(gatkReportData, T)
|
||||
plotJobsGantt(gatkReportData, F)
|
||||
plotProgressByTime(gatkReportData)
|
||||
for ( group in gatkReportData ) {
|
||||
plotGroup(group)
|
||||
}
|
||||
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
dev.off()
|
||||
}
|
||||
|
|
@ -303,7 +303,10 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
uac.alleles = alleles;
|
||||
if (!bamIsTruth) uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
|
||||
if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
|
||||
if (deletions >= 0)
|
||||
uac.MAX_DELETION_FRACTION = deletions;
|
||||
else
|
||||
uac.MAX_DELETION_FRACTION = 1.0;
|
||||
if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
|
||||
if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
|
||||
|
||||
|
|
@ -347,12 +350,14 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
}
|
||||
|
||||
VariantCallContext call;
|
||||
if ( vcComp.isSNP() )
|
||||
if ( vcComp.isSNP() ) {
|
||||
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||
else if ( vcComp.isIndel() ) {
|
||||
} else if ( vcComp.isIndel() ) {
|
||||
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||
}
|
||||
else {
|
||||
} else if ( bamIsTruth ) {
|
||||
// assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam
|
||||
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
||||
} else {
|
||||
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
|
||||
return counter;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.lucene.messages.NLS;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -409,6 +408,17 @@ public class MathUtils {
|
|||
return Math.sqrt(rms);
|
||||
}
|
||||
|
||||
public static double rms(Collection<Double> l) {
|
||||
if (l.size() == 0)
|
||||
return 0.0;
|
||||
|
||||
double rms = 0.0;
|
||||
for (Double i : l)
|
||||
rms += i*i;
|
||||
rms /= l.size();
|
||||
return Math.sqrt(rms);
|
||||
}
|
||||
|
||||
public static double distanceSquared( final double[] x, final double[] y ) {
|
||||
double dist = 0.0;
|
||||
for(int iii = 0; iii < x.length; iii++) {
|
||||
|
|
|
|||
|
|
@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.poi.hssf.record.PageBreakRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.PipedOutputStream;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
|
|
@ -256,13 +252,17 @@ public class ClippingOp {
|
|||
if (start == 0 && stop == read.getReadLength() -1)
|
||||
return new SAMRecord(read.getHeader());
|
||||
|
||||
int newLength = read.getReadLength() - (stop - start + 1);
|
||||
CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
|
||||
|
||||
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
||||
// starting or ending the read after applying the hard clip on start/stop.
|
||||
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
||||
byte [] newBases = new byte[newLength];
|
||||
byte [] newQuals = new byte[newLength];
|
||||
int copyStart = (start == 0) ? stop + 1 : 0;
|
||||
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
||||
|
||||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
|
||||
|
||||
SAMRecord hardClippedRead;
|
||||
try {
|
||||
|
|
@ -273,19 +273,20 @@ public class ClippingOp {
|
|||
|
||||
hardClippedRead.setBaseQualities(newQuals);
|
||||
hardClippedRead.setReadBases(newBases);
|
||||
hardClippedRead.setCigar(newCigar);
|
||||
hardClippedRead.setCigar(cigarShift.cigar);
|
||||
if (start == 0)
|
||||
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar));
|
||||
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
|
||||
|
||||
return hardClippedRead;
|
||||
|
||||
}
|
||||
|
||||
@Requires({"!cigar.isEmpty()"})
|
||||
private Cigar hardClipCigar (Cigar cigar, int start, int stop) {
|
||||
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
|
||||
Cigar newCigar = new Cigar();
|
||||
int index = 0;
|
||||
int totalHardClipCount = stop - start + 1;
|
||||
int alignmentShift = 0; // caused by hard clipping insertions or deletions
|
||||
|
||||
// hard clip the beginning of the cigar string
|
||||
if (start == 0) {
|
||||
|
|
@ -307,15 +308,19 @@ public class ClippingOp {
|
|||
|
||||
// we're still clipping or just finished perfectly
|
||||
if (index + shift == stop + 1) {
|
||||
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||
}
|
||||
// element goes beyond what we need to clip
|
||||
else if (index + shift > stop + 1) {
|
||||
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
||||
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
|
||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
||||
}
|
||||
index += shift;
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
|
||||
|
||||
if (index <= stop && cigarElementIterator.hasNext())
|
||||
cigarElement = cigarElementIterator.next();
|
||||
}
|
||||
|
|
@ -345,6 +350,8 @@ public class ClippingOp {
|
|||
// element goes beyond our clip starting position
|
||||
else {
|
||||
int elementLengthAfterChopping = start - index;
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
|
||||
|
||||
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
totalHardClipCount += elementLengthAfterChopping;
|
||||
|
|
@ -356,9 +363,67 @@ public class ClippingOp {
|
|||
if (index < start && cigarElementIterator.hasNext())
|
||||
cigarElement = cigarElementIterator.next();
|
||||
}
|
||||
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
|
||||
|
||||
// check if we are hard clipping indels
|
||||
while(cigarElementIterator.hasNext()) {
|
||||
cigarElement = cigarElementIterator.next();
|
||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||
}
|
||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||
}
|
||||
return newCigar;
|
||||
return cleanHardClippedCigar(newCigar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
|
||||
* and cleans it up accordingly.
|
||||
*
|
||||
* @param cigar
|
||||
* @return
|
||||
*/
|
||||
private CigarShift cleanHardClippedCigar(Cigar cigar) {
|
||||
Cigar cleanCigar = new Cigar();
|
||||
int shiftFromStart = 0;
|
||||
int shiftFromEnd = 0;
|
||||
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
|
||||
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
|
||||
|
||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||
cigarStack.push(cigarElement);
|
||||
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
int shift = 0;
|
||||
boolean readHasStarted = false;
|
||||
|
||||
while(!cigarStack.empty()) {
|
||||
CigarElement cigarElement = cigarStack.pop();
|
||||
|
||||
if ( !readHasStarted &&
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION &&
|
||||
cigarElement.getOperator() != CigarOperator.DELETION &&
|
||||
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||
readHasStarted = true;
|
||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
shift += cigarElement.getLength();
|
||||
|
||||
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
||||
if (i==1)
|
||||
inverseCigarStack.push(cigarElement);
|
||||
else
|
||||
cleanCigar.add(cigarElement);
|
||||
}
|
||||
}
|
||||
// first pass (i=1) is from end to start of the cigar elements
|
||||
if (i == 1) {
|
||||
shiftFromEnd = shift;
|
||||
cigarStack = inverseCigarStack;
|
||||
}
|
||||
// second pass (i=2) is from start to end with the end already cleaned
|
||||
else {
|
||||
shiftFromStart = shift;
|
||||
}
|
||||
}
|
||||
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
|
||||
}
|
||||
|
||||
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
|
||||
|
|
@ -379,6 +444,34 @@ public class ClippingOp {
|
|||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return shift;
|
||||
}
|
||||
}
|
||||
|
||||
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
|
||||
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||
int cigarElementLength = cigarElement.getLength();
|
||||
if (clippedLength >= cigarElementLength)
|
||||
return -cigarElement.getLength();
|
||||
else
|
||||
return -clippedLength;
|
||||
}
|
||||
|
||||
if (cigarElement.getOperator() == CigarOperator.DELETION)
|
||||
return cigarElement.getLength();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private class CigarShift {
|
||||
private Cigar cigar;
|
||||
private int shiftFromStart;
|
||||
private int shiftFromEnd;
|
||||
|
||||
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
|
||||
this.cigar = cigar;
|
||||
this.shiftFromStart = shiftFromStart;
|
||||
this.shiftFromEnd = shiftFromEnd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broad.tribble.util.PositionalStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.jets3t.service.multi.ThreadedStorageService;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
|
@ -50,13 +48,26 @@ public class ReadClipper {
|
|||
return read;
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
|
||||
return hardClipByReferenceCoordinates(-1, refStop);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
|
||||
return hardClipByReferenceCoordinates(refStart, -1);
|
||||
}
|
||||
|
||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||
|
||||
System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
return clippedRead;
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
|
|
@ -64,10 +75,12 @@ public class ReadClipper {
|
|||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
||||
@Requires("left <= right")
|
||||
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||
this.read = hardClipByReferenceCoordinates(-1, left);
|
||||
this.ops = null; // reset the operations
|
||||
return hardClipByReferenceCoordinates(right, -1);
|
||||
if (left == right)
|
||||
return new SAMRecord(read.getHeader());
|
||||
this.read = hardClipByReferenceCoordinates(right, -1);
|
||||
return hardClipByReferenceCoordinates(-1, left);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipLowQualEnds(byte lowQual) {
|
||||
|
|
|
|||
|
|
@ -116,10 +116,26 @@ public class VCFUtils {
|
|||
return fields;
|
||||
}
|
||||
|
||||
|
||||
/** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */
|
||||
private static final class HeaderConflictWarner {
|
||||
Logger logger;
|
||||
Set<String> alreadyIssued = new HashSet<String>();
|
||||
|
||||
private HeaderConflictWarner(final Logger logger) {
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
public void warn(final VCFHeaderLine line, final String msg) {
|
||||
if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) {
|
||||
alreadyIssued.add(line.getKey());
|
||||
logger.warn(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, Logger logger) throws IllegalStateException {
|
||||
HashMap<String, VCFHeaderLine> map = new HashMap<String, VCFHeaderLine>(); // from KEY.NAME -> line
|
||||
HeaderConflictWarner conflictWarner = new HeaderConflictWarner(logger);
|
||||
|
||||
// todo -- needs to remove all version headers from sources and add its own VCF version line
|
||||
for ( VCFHeader source : headers ) {
|
||||
|
|
@ -152,24 +168,24 @@ public class VCFUtils {
|
|||
// number, then this value should be 1. However, if the INFO field describes a pair
|
||||
// of numbers, then this value should be 2 and so on. If the number of possible
|
||||
// values varies, is unknown, or is unbounded, then this value should be '.'.
|
||||
if ( logger != null ) logger.warn("Promoting header field Number to . due to number differences in header lines: " + line + " " + other);
|
||||
conflictWarner.warn(line, "Promoting header field Number to . due to number differences in header lines: " + line + " " + other);
|
||||
compOther.setNumberToUnbounded();
|
||||
} else if ( compLine.getType() == VCFHeaderLineType.Integer && compOther.getType() == VCFHeaderLineType.Float ) {
|
||||
// promote key to Float
|
||||
if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther);
|
||||
conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther);
|
||||
map.put(key, compOther);
|
||||
} else if ( compLine.getType() == VCFHeaderLineType.Float && compOther.getType() == VCFHeaderLineType.Integer ) {
|
||||
// promote key to Float
|
||||
if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther);
|
||||
conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther);
|
||||
} else {
|
||||
throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
|
||||
}
|
||||
}
|
||||
if ( ! compLine.getDescription().equals(compOther) )
|
||||
if ( logger != null ) logger.warn("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
|
||||
conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
|
||||
} else {
|
||||
// we are not equal, but we're not anything special either
|
||||
if ( logger != null ) logger.warn("Ignoring header line already in map: this header line = " + line + " already present header = " + other);
|
||||
conflictWarner.warn(line, "Ignoring header line already in map: this header line = " + line + " already present header = " + other);
|
||||
}
|
||||
} else {
|
||||
map.put(key, line);
|
||||
|
|
|
|||
|
|
@ -632,7 +632,7 @@ public class ReadUtils {
|
|||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
||||
|
||||
int start = getRefCoordSoftUnclippedStart(read);
|
||||
int stop = getRefCoordSoftUnclippedStop(read);
|
||||
int stop = getRefCoordSoftUnclippedEnd(read);
|
||||
|
||||
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||
|
|
@ -658,6 +658,7 @@ public class ReadUtils {
|
|||
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
||||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
||||
int start = read.getUnclippedStart();
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
|
|
@ -669,51 +670,98 @@ public class ReadUtils {
|
|||
return start;
|
||||
}
|
||||
|
||||
public static int getRefCoordSoftUnclippedStop(SAMRecord read) {
|
||||
int stop = read.getAlignmentEnd();
|
||||
List<CigarElement> cigarElementList = read.getCigar().getCigarElements();
|
||||
CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1);
|
||||
if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||
stop += lastCigarElement.getLength();
|
||||
return stop;
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||
int stop = read.getUnclippedStart();
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
stop += shift;
|
||||
lastOperator = cigarElement.getOperator();
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = 0;
|
||||
}
|
||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
|
||||
* the alignment start of the read.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
|
||||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"})
|
||||
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) {
|
||||
if (getRefCoordSoftUnclippedStart(read) <= refCoord)
|
||||
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after
|
||||
* the alignment end of the read.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
|
||||
*/
|
||||
@Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"})
|
||||
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) {
|
||||
if (getRefCoordSoftUnclippedEnd(read) >= refCoord)
|
||||
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
|
||||
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
|
||||
boolean goalReached = refBases == goal;
|
||||
|
||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||
CigarElement cigarElement = cigarElementIterator.next();
|
||||
int shift = 0;
|
||||
//if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
||||
// goal -= cigarElement.getLength();
|
||||
//}
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||
if (refBases + cigarElement.getLength() < goal) {
|
||||
shift = cigarElement.getLength();
|
||||
}
|
||||
else {
|
||||
shift = goal - refBases;
|
||||
}
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases()) {
|
||||
readBases += goalReached ? shift : cigarElement.getLength();
|
||||
}
|
||||
if (refCoord < read.getAlignmentStart()) {
|
||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
||||
if (readBases < 0)
|
||||
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
|
||||
}
|
||||
else if (refCoord > read.getAlignmentEnd()) {
|
||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord);
|
||||
if (readBases < 0)
|
||||
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
|
||||
}
|
||||
else {
|
||||
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
|
||||
boolean goalReached = refBases == goal;
|
||||
|
||||
if (!goalReached)
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||
CigarElement cigarElement = cigarElementIterator.next();
|
||||
int shift = 0;
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||
if (refBases + cigarElement.getLength() < goal) {
|
||||
shift = cigarElement.getLength();
|
||||
}
|
||||
else {
|
||||
shift = goal - refBases;
|
||||
}
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases()) {
|
||||
readBases += goalReached ? shift : cigarElement.getLength();
|
||||
}
|
||||
}
|
||||
|
||||
if (!goalReached)
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
}
|
||||
|
||||
return readBases;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ import java.util.EnumSet;
|
|||
* Test suite for the parsing engine.
|
||||
*/
|
||||
public class ParsingEngineUnitTest extends BaseTest {
|
||||
/** we absolutely cannot have this file existing, or we'll fail the UnitTest */
|
||||
private final static String NON_EXISTANT_FILENAME_VCF = "this_file_should_not_exist_on_disk_123456789.vcf";
|
||||
private ParsingEngine parsingEngine;
|
||||
|
||||
@BeforeMethod
|
||||
|
|
@ -636,7 +638,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void basicRodBindingArgumentTest() {
|
||||
final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -646,7 +648,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
|
|
@ -659,7 +661,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void shortNameOnlyRodBindingArgumentTest() {
|
||||
final String[] commandLine = new String[] {"-short:vcf","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-short:vcf",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( ShortNameOnlyRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -669,7 +671,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
|
|
@ -711,7 +713,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void rodBindingArgumentTestMissingType() {
|
||||
final String[] commandLine = new String[] {"-V","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-V",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -723,7 +725,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test(expectedExceptions = UserException.class)
|
||||
public void rodBindingArgumentTestTooManyTags() {
|
||||
final String[] commandLine = new String[] {"-V:x,y,z","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:x,y,z",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -740,7 +742,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void variantContextBindingArgumentTest() {
|
||||
final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -750,14 +752,14 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void variantContextBindingArgumentTestVCF3() {
|
||||
final String[] commandLine = new String[] {"-V:vcf3","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:vcf3",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -767,7 +769,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
parsingEngine.loadArgumentsIntoObject( argProvider );
|
||||
|
||||
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
}
|
||||
|
|
@ -779,7 +781,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void listRodBindingArgumentTest() {
|
||||
final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF};
|
||||
|
||||
parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -791,14 +793,14 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings");
|
||||
RodBinding<Feature> binding = argProvider.bindings.get(0);
|
||||
Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
|
||||
Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void listRodBindingArgumentTest2Args() {
|
||||
final String[] commandLine = new String[] {"-V:vcf","foo.vcf", "-V:vcf", "bar.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF, "-V:vcf", "bar.vcf"};
|
||||
|
||||
parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
@ -811,7 +813,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
RodBinding<Feature> binding = argProvider.bindings.get(0);
|
||||
Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly");
|
||||
Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
|
||||
Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
|
||||
Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value");
|
||||
Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
|
||||
|
||||
|
|
@ -838,7 +840,7 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void listRodBindingArgumentTestExplicitlyNamed() {
|
||||
final String[] commandLine = new String[] {"-V:foo,vcf","foo.vcf", "-V:foo,vcf", "bar.vcf"};
|
||||
final String[] commandLine = new String[] {"-V:foo,vcf",NON_EXISTANT_FILENAME_VCF, "-V:foo,vcf", "bar.vcf"};
|
||||
|
||||
parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
|
||||
parsingEngine.parse( commandLine );
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" />
|
||||
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2034" status="integration" publication="20110718185300" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="picard" revision="1.52.944" status="release" />
|
||||
</ivy-module>
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf" module="sam" revision="1.52.944" status="release" />
|
||||
</ivy-module>
|
||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="18" status="integration" />
|
||||
<info organisation="org.broad" module="tribble" revision="21" status="integration" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue