diff --git a/public/R/queueJobReport.R b/public/R/queueJobReport.R new file mode 100644 index 000000000..18d33054e --- /dev/null +++ b/public/R/queueJobReport.R @@ -0,0 +1,154 @@ +library(gsalib) +require("ggplot2") +require("gplots") + +# +# Standard command line switch. Can we loaded interactively for development +# or executed with RScript +# +args = commandArgs(TRUE) +onCMDLine = ! is.na(args[1]) +if ( onCMDLine ) { + inputFileName = args[1] + outputPDF = args[2] +} else { + inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + outputPDF = NA +} + +RUNTIME_UNITS = "(sec)" +ORIGINAL_UNITS_TO_SECONDS = 1/1000 + +# +# Helper function to aggregate all of the jobs in the report across all tables +# +allJobsFromReport <- function(report) { + names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts") + sub <- lapply(report, function(table) table[,names]) + do.call("rbind", sub) +} + +# +# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job +# +plotJobsGantt <- function(gatkReport, sortOverall) { + allJobs = allJobsFromReport(gatkReport) + if ( sortOverall ) { + title = "All jobs, by analysis, by start time" + allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ] + } else { + title = "All jobs, sorted by start time" + allJobs = allJobs[order(allJobs$startTime, decreasing=T), ] + } + allJobs$index = 1:nrow(allJobs) + minTime = min(allJobs$startTime) + allJobs$relStartTime = allJobs$startTime - minTime + allJobs$relDoneTime = allJobs$doneTime - minTime + allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts) + maxRelTime = max(allJobs$relDoneTime) + p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName)) + p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm"))) + p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2) + p <- p + xlim(0, maxRelTime * 1.1) + p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS)) + p <- p + ylab("Job") + p <- p + opts(title=title) + print(p) +} + +# +# Plots scheduling efficiency at job events +# +plotProgressByTime <- function(gatkReport) { + allJobs = allJobsFromReport(gatkReport) + nJobs = dim(allJobs)[1] + allJobs = allJobs[order(allJobs$startTime, decreasing=F),] + allJobs$index = 1:nrow(allJobs) + + minTime = min(allJobs$startTime) + allJobs$relStartTime = allJobs$startTime - minTime + allJobs$relDoneTime = allJobs$doneTime - minTime + + times = sort(c(allJobs$relStartTime, allJobs$relDoneTime)) + + countJobs <- function(p) { + s = allJobs$relStartTime + e = allJobs$relDoneTime + x = c() # I wish I knew how to make this work with apply + for ( time in times ) + x = c(x, sum(p(s, e, time))) + x + } + + pending = countJobs(function(s, e, t) s > t) + done = countJobs(function(s, e, t) e < t) + running = nJobs - pending - done + + d = data.frame(times=times, pending=pending, running=running, done=done) + + p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable)) + p <- p + facet_grid(variable ~ ., scales="free") + p <- p + geom_line(size=2) + p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS)) + p <- p + opts(title = "Job scheduling") + print(p) +} + +# +# Creates tables for each job in this group +# +standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime") +plotGroup <- function(groupTable) { + name = unique(groupTable$analysisName)[1] + groupAnnotations = setdiff(names(groupTable), standardColumns) + sub = groupTable[,c("jobName", groupAnnotations, "runtime")] + sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ] + + # create a table showing each job and all annotations + textplot(sub, show.rownames=F) + title(paste("Job summary for", name, "full itemization"), cex=3) + + # create the table for each combination of values in the group, listing iterations in the columns + sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "itemizing each iteration"), cex=3) + + # as above, but averaging over all iterations + groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) +} + +# print out some useful basic information +print("Report") +print(paste("Project :", inputFileName)) + +convertUnits <- function(gatkReportData) { + convertGroup <- function(g) { + g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS + g + } + lapply(gatkReportData, convertGroup) +} + + +# read the table +gatkReportData <- gsa.read.gatkreport(inputFileName) +gatkReportData <- convertUnits(gatkReportData) +#print(summary(gatkReportData)) + +if ( ! is.na(outputPDF) ) { + pdf(outputPDF, height=8.5, width=11) +} + +plotJobsGantt(gatkReportData, T) +plotJobsGantt(gatkReportData, F) +plotProgressByTime(gatkReportData) +for ( group in gatkReportData ) { + plotGroup(group) +} + +if ( ! is.na(outputPDF) ) { + dev.off() +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index 2b38afaf6..b616a0ebe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -303,7 +303,10 @@ public class GenotypeAndValidateWalker extends RodWalker= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; - if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions; + if (deletions >= 0) + uac.MAX_DELETION_FRACTION = deletions; + else + uac.MAX_DELETION_FRACTION = 1.0; if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf; @@ -347,12 +350,14 @@ public class GenotypeAndValidateWalker extends RodWalker l) { + if (l.size() == 0) + return 0.0; + + double rms = 0.0; + for (Double i : l) + rms += i*i; + rms /= l.size(); + return Math.sqrt(rms); + } + public static double distanceSquared( final double[] x, final double[] y ) { double dist = 0.0; for(int iii = 0; iii < x.length; iii++) { diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index d899613d7..faa3cf34e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -5,15 +5,11 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; -import org.apache.poi.hssf.record.PageBreakRecord; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import java.io.PipedOutputStream; -import java.lang.reflect.Array; import java.util.Iterator; -import java.util.List; +import java.util.Stack; import java.util.Vector; /** @@ -256,13 +252,17 @@ public class ClippingOp { if (start == 0 && stop == read.getReadLength() -1) return new SAMRecord(read.getHeader()); - int newLength = read.getReadLength() - (stop - start + 1); + CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop); + + // the cigar may force a shift left or right (or both) in case we are left with insertions + // starting or ending the read after applying the hard clip on start/stop. + int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd; byte [] newBases = new byte[newLength]; byte [] newQuals = new byte[newLength]; - int copyStart = (start == 0) ? stop + 1 : 0; + int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart; System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength); - System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop); + System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); SAMRecord hardClippedRead; try { @@ -273,19 +273,20 @@ public class ClippingOp { hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setReadBases(newBases); - hardClippedRead.setCigar(newCigar); + hardClippedRead.setCigar(cigarShift.cigar); if (start == 0) - hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar)); + hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar)); return hardClippedRead; } @Requires({"!cigar.isEmpty()"}) - private Cigar hardClipCigar (Cigar cigar, int start, int stop) { + private CigarShift hardClipCigar (Cigar cigar, int start, int stop) { Cigar newCigar = new Cigar(); int index = 0; int totalHardClipCount = stop - start + 1; + int alignmentShift = 0; // caused by hard clipping insertions or deletions // hard clip the beginning of the cigar string if (start == 0) { @@ -307,15 +308,19 @@ public class ClippingOp { // we're still clipping or just finished perfectly if (index + shift == stop + 1) { - newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); + newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } // element goes beyond what we need to clip else if (index + shift > stop + 1) { int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1); - newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1); + newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator())); } index += shift; + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift); + if (index <= stop && cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); } @@ -345,6 +350,8 @@ public class ClippingOp { // element goes beyond our clip starting position else { int elementLengthAfterChopping = start - index; + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index)); + // if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) totalHardClipCount += elementLengthAfterChopping; @@ -356,9 +363,67 @@ public class ClippingOp { if (index < start && cigarElementIterator.hasNext()) cigarElement = cigarElementIterator.next(); } - newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP)); + + // check if we are hard clipping indels + while(cigarElementIterator.hasNext()) { + cigarElement = cigarElementIterator.next(); + alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength()); + } + newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP)); } - return newCigar; + return cleanHardClippedCigar(newCigar); + } + + /** + * Checks if a hard clipped cigar left a read starting or ending with insertions/deletions + * and cleans it up accordingly. + * + * @param cigar + * @return + */ + private CigarShift cleanHardClippedCigar(Cigar cigar) { + Cigar cleanCigar = new Cigar(); + int shiftFromStart = 0; + int shiftFromEnd = 0; + Stack cigarStack = new Stack(); + Stack inverseCigarStack = new Stack(); + + for (CigarElement cigarElement : cigar.getCigarElements()) + cigarStack.push(cigarElement); + + for (int i = 1; i <= 2; i++) { + int shift = 0; + boolean readHasStarted = false; + + while(!cigarStack.empty()) { + CigarElement cigarElement = cigarStack.pop(); + + if ( !readHasStarted && + cigarElement.getOperator() != CigarOperator.INSERTION && + cigarElement.getOperator() != CigarOperator.DELETION && + cigarElement.getOperator() != CigarOperator.HARD_CLIP) + readHasStarted = true; + else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION) + shift += cigarElement.getLength(); + + if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) { + if (i==1) + inverseCigarStack.push(cigarElement); + else + cleanCigar.add(cigarElement); + } + } + // first pass (i=1) is from end to start of the cigar elements + if (i == 1) { + shiftFromEnd = shift; + cigarStack = inverseCigarStack; + } + // second pass (i=2) is from start to end with the end already cleaned + else { + shiftFromStart = shift; + } + } + return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd); } private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { @@ -379,6 +444,34 @@ public class ClippingOp { else break; } + return shift; } -} + + private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { + if (cigarElement.getOperator() == CigarOperator.INSERTION) { + int cigarElementLength = cigarElement.getLength(); + if (clippedLength >= cigarElementLength) + return -cigarElement.getLength(); + else + return -clippedLength; + } + + if (cigarElement.getOperator() == CigarOperator.DELETION) + return cigarElement.getLength(); + + return 0; + } + + private class CigarShift { + private Cigar cigar; + private int shiftFromStart; + private int shiftFromEnd; + + private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) { + this.cigar = cigar; + this.shiftFromStart = shiftFromStart; + this.shiftFromEnd = shiftFromEnd; + } + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 6248849f9..be045309a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.utils.clipreads; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; +import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; -import org.broad.tribble.util.PositionalStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.jets3t.service.multi.ThreadedStorageService; import java.util.ArrayList; import java.util.List; @@ -50,13 +48,26 @@ public class ReadClipper { return read; } - public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) { + return hardClipByReferenceCoordinates(-1, refStop); + } + + public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) { + return hardClipByReferenceCoordinates(refStart, -1); + } + + private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); - System.out.println("Clipping start/stop: " + start + "/" + stop); + if (start < 0 || stop > read.getReadLength() - 1) + throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); + + //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); - return clipRead(ClippingRepresentation.HARDCLIP_BASES); + SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); + this.ops = null; + return clippedRead; } public SAMRecord hardClipByReadCoordinates(int start, int stop) { @@ -64,10 +75,12 @@ public class ReadClipper { return clipRead(ClippingRepresentation.HARDCLIP_BASES); } + @Requires("left <= right") public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { - this.read = hardClipByReferenceCoordinates(-1, left); - this.ops = null; // reset the operations - return hardClipByReferenceCoordinates(right, -1); + if (left == right) + return new SAMRecord(read.getHeader()); + this.read = hardClipByReferenceCoordinates(right, -1); + return hardClipByReferenceCoordinates(-1, left); } public SAMRecord hardClipLowQualEnds(byte lowQual) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index c0a04c81f..2d8421507 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -116,10 +116,26 @@ public class VCFUtils { return fields; } - + /** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */ + private static final class HeaderConflictWarner { + Logger logger; + Set alreadyIssued = new HashSet(); + + private HeaderConflictWarner(final Logger logger) { + this.logger = logger; + } + + public void warn(final VCFHeaderLine line, final String msg) { + if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) { + alreadyIssued.add(line.getKey()); + logger.warn(msg); + } + } + } public static Set smartMergeHeaders(Collection headers, Logger logger) throws IllegalStateException { HashMap map = new HashMap(); // from KEY.NAME -> line + HeaderConflictWarner conflictWarner = new HeaderConflictWarner(logger); // todo -- needs to remove all version headers from sources and add its own VCF version line for ( VCFHeader source : headers ) { @@ -152,24 +168,24 @@ public class VCFUtils { // number, then this value should be 1. However, if the INFO field describes a pair // of numbers, then this value should be 2 and so on. If the number of possible // values varies, is unknown, or is unbounded, then this value should be '.'. - if ( logger != null ) logger.warn("Promoting header field Number to . due to number differences in header lines: " + line + " " + other); + conflictWarner.warn(line, "Promoting header field Number to . due to number differences in header lines: " + line + " " + other); compOther.setNumberToUnbounded(); } else if ( compLine.getType() == VCFHeaderLineType.Integer && compOther.getType() == VCFHeaderLineType.Float ) { // promote key to Float - if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther); + conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther); map.put(key, compOther); } else if ( compLine.getType() == VCFHeaderLineType.Float && compOther.getType() == VCFHeaderLineType.Integer ) { // promote key to Float - if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther); + conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther); } else { throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other ); } } if ( ! compLine.getDescription().equals(compOther) ) - if ( logger != null ) logger.warn("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine); + conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine); } else { // we are not equal, but we're not anything special either - if ( logger != null ) logger.warn("Ignoring header line already in map: this header line = " + line + " already present header = " + other); + conflictWarner.warn(line, "Ignoring header line already in map: this header line = " + line + " already present header = " + other); } } else { map.put(key, line); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 6be37cf52..62bbb0307 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -632,7 +632,7 @@ public class ReadUtils { public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) { int start = getRefCoordSoftUnclippedStart(read); - int stop = getRefCoordSoftUnclippedStop(read); + int stop = getRefCoordSoftUnclippedEnd(read); if ( !read.getReferenceName().equals(interval.getContig()) ) return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; @@ -658,6 +658,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -669,51 +670,98 @@ public class ReadUtils { return start; } - public static int getRefCoordSoftUnclippedStop(SAMRecord read) { - int stop = read.getAlignmentEnd(); - List cigarElementList = read.getCigar().getCigarElements(); - CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1); - if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP) - stop += lastCigarElement.getLength(); - return stop; + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + int stop = read.getUnclippedStart(); + int shift = 0; + CigarOperator lastOperator = null; + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + stop += shift; + lastOperator = cigarElement.getOperator(); + if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) + shift = cigarElement.getLength(); + else + shift = 0; + } + return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; + } + + /** + * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before + * the alignment start of the read. + * + * @param read + * @param refCoord + * @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before) + */ + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"}) + private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) { + if (getRefCoordSoftUnclippedStart(read) <= refCoord) + return refCoord - getRefCoordSoftUnclippedStart(read) + 1; + return -1; } + /** + * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after + * the alignment end of the read. + * + * @param read + * @param refCoord + * @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before) + */ + @Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"}) + private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) { + if (getRefCoordSoftUnclippedEnd(read) >= refCoord) + return refCoord - getRefCoordSoftUnclippedStart(read) + 1; + return -1; + } - @Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"}) + + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; - int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases - boolean goalReached = refBases == goal; - Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); - while (!goalReached && cigarElementIterator.hasNext()) { - CigarElement cigarElement = cigarElementIterator.next(); - int shift = 0; - //if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) { - // goal -= cigarElement.getLength(); - //} - - if (cigarElement.getOperator().consumesReferenceBases()) { - if (refBases + cigarElement.getLength() < goal) { - shift = cigarElement.getLength(); - } - else { - shift = goal - refBases; - } - refBases += shift; - } - goalReached = refBases == goal; - - if (cigarElement.getOperator().consumesReadBases()) { - readBases += goalReached ? shift : cigarElement.getLength(); - } + if (refCoord < read.getAlignmentStart()) { + readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord); + if (readBases < 0) + throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate."); } + else if (refCoord > read.getAlignmentEnd()) { + readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord); + if (readBases < 0) + throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate."); + } + else { + int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases + boolean goalReached = refBases == goal; - if (!goalReached) - throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); + while (!goalReached && cigarElementIterator.hasNext()) { + CigarElement cigarElement = cigarElementIterator.next(); + int shift = 0; + + if (cigarElement.getOperator().consumesReferenceBases()) { + if (refBases + cigarElement.getLength() < goal) { + shift = cigarElement.getLength(); + } + else { + shift = goal - refBases; + } + refBases += shift; + } + goalReached = refBases == goal; + + if (cigarElement.getOperator().consumesReadBases()) { + readBases += goalReached ? shift : cigarElement.getLength(); + } + } + + if (!goalReached) + throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + } return readBases; } diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 013a37a88..f04731214 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -40,6 +40,8 @@ import java.util.EnumSet; * Test suite for the parsing engine. */ public class ParsingEngineUnitTest extends BaseTest { + /** we absolutely cannot have this file existing, or we'll fail the UnitTest */ + private final static String NON_EXISTANT_FILENAME_VCF = "this_file_should_not_exist_on_disk_123456789.vcf"; private ParsingEngine parsingEngine; @BeforeMethod @@ -636,7 +638,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void basicRodBindingArgumentTest() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -646,7 +648,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -659,7 +661,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void shortNameOnlyRodBindingArgumentTest() { - final String[] commandLine = new String[] {"-short:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-short:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( ShortNameOnlyRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -669,7 +671,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -711,7 +713,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test(expectedExceptions = UserException.class) public void rodBindingArgumentTestMissingType() { - final String[] commandLine = new String[] {"-V","foo.vcf"}; + final String[] commandLine = new String[] {"-V",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -723,7 +725,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test(expectedExceptions = UserException.class) public void rodBindingArgumentTestTooManyTags() { - final String[] commandLine = new String[] {"-V:x,y,z","foo.vcf"}; + final String[] commandLine = new String[] {"-V:x,y,z",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -740,7 +742,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void variantContextBindingArgumentTest() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -750,14 +752,14 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } @Test public void variantContextBindingArgumentTestVCF3() { - final String[] commandLine = new String[] {"-V:vcf3","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf3",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -767,7 +769,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.loadArgumentsIntoObject( argProvider ); Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } @@ -779,7 +781,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void listRodBindingArgumentTest() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF}; parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -791,14 +793,14 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings"); RodBinding binding = argProvider.bindings.get(0); Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } @Test public void listRodBindingArgumentTest2Args() { - final String[] commandLine = new String[] {"-V:vcf","foo.vcf", "-V:vcf", "bar.vcf"}; + final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF, "-V:vcf", "bar.vcf"}; parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); @@ -811,7 +813,7 @@ public class ParsingEngineUnitTest extends BaseTest { RodBinding binding = argProvider.bindings.get(0); Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -838,7 +840,7 @@ public class ParsingEngineUnitTest extends BaseTest { @Test public void listRodBindingArgumentTestExplicitlyNamed() { - final String[] commandLine = new String[] {"-V:foo,vcf","foo.vcf", "-V:foo,vcf", "bar.vcf"}; + final String[] commandLine = new String[] {"-V:foo,vcf",NON_EXISTANT_FILENAME_VCF, "-V:foo,vcf", "bar.vcf"}; parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar b/settings/repository/edu.mit.broad/picard-private-parts-2034.jar similarity index 62% rename from settings/repository/edu.mit.broad/picard-private-parts-1959.jar rename to settings/repository/edu.mit.broad/picard-private-parts-2034.jar index ae11e636b..11f59420c 100644 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1959.jar and b/settings/repository/edu.mit.broad/picard-private-parts-2034.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1959.xml b/settings/repository/edu.mit.broad/picard-private-parts-2034.xml similarity index 64% rename from settings/repository/edu.mit.broad/picard-private-parts-1959.xml rename to settings/repository/edu.mit.broad/picard-private-parts-2034.xml index e7c7e3a21..1a60a2015 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1959.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-2034.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.49.895.xml b/settings/repository/net.sf/picard-1.49.895.xml deleted file mode 100644 index 52d4900c5..000000000 --- a/settings/repository/net.sf/picard-1.49.895.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.49.895.jar b/settings/repository/net.sf/picard-1.52.944.jar similarity index 82% rename from settings/repository/net.sf/picard-1.49.895.jar rename to settings/repository/net.sf/picard-1.52.944.jar index 3ee1f2090..e147ebbba 100644 Binary files a/settings/repository/net.sf/picard-1.49.895.jar and b/settings/repository/net.sf/picard-1.52.944.jar differ diff --git a/settings/repository/net.sf/picard-1.52.944.xml b/settings/repository/net.sf/picard-1.52.944.xml new file mode 100644 index 000000000..61fac6644 --- /dev/null +++ b/settings/repository/net.sf/picard-1.52.944.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.49.895.xml b/settings/repository/net.sf/sam-1.49.895.xml deleted file mode 100644 index 0436ce881..000000000 --- a/settings/repository/net.sf/sam-1.49.895.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/sam-1.49.895.jar b/settings/repository/net.sf/sam-1.52.944.jar similarity index 76% rename from settings/repository/net.sf/sam-1.49.895.jar rename to settings/repository/net.sf/sam-1.52.944.jar index c55ab0b72..7037c7a96 100644 Binary files a/settings/repository/net.sf/sam-1.49.895.jar and b/settings/repository/net.sf/sam-1.52.944.jar differ diff --git a/settings/repository/net.sf/sam-1.52.944.xml b/settings/repository/net.sf/sam-1.52.944.xml new file mode 100644 index 000000000..2229395b2 --- /dev/null +++ b/settings/repository/net.sf/sam-1.52.944.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/org.broad/tribble-18.jar b/settings/repository/org.broad/tribble-21.jar similarity index 88% rename from settings/repository/org.broad/tribble-18.jar rename to settings/repository/org.broad/tribble-21.jar index 1ea101dd0..2cb96f93d 100644 Binary files a/settings/repository/org.broad/tribble-18.jar and b/settings/repository/org.broad/tribble-21.jar differ diff --git a/settings/repository/org.broad/tribble-18.xml b/settings/repository/org.broad/tribble-21.xml similarity index 51% rename from settings/repository/org.broad/tribble-18.xml rename to settings/repository/org.broad/tribble-21.xml index d2648ddad..cd93af1b0 100644 --- a/settings/repository/org.broad/tribble-18.xml +++ b/settings/repository/org.broad/tribble-21.xml @@ -1,3 +1,3 @@ - +