diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index 0c26d847d..752fdaa3b 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -548,7 +548,7 @@ public abstract class AbstractGenomeAnalysisEngine { private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) { DownsamplingMethod method = getDownsamplingMethod(); - if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.NONE ) + if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); return new SAMDataSource( @@ -562,7 +562,7 @@ public abstract class AbstractGenomeAnalysisEngine { filters, includeReadsWithDeletionAtLoci(), generateExtendedEvents(), - getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.NONE, + getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF, getWalkerBAQQualityMode(), refReader); } diff --git a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 63f6680d4..8b791a3a7 100755 --- a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -38,7 +38,7 @@ public class ReadProperties { private boolean includeReadsWithDeletionAtLoci = false; private boolean useOriginalBaseQualities = false; private boolean generateExtendedEvents = false; - private BAQ.CalculationMode cmode = BAQ.CalculationMode.NONE; + private BAQ.CalculationMode cmode = BAQ.CalculationMode.OFF; private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY; IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 433d49e40..a56d321da 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.utils.interval.IntervalSetRule; @@ -153,12 +152,11 @@ public class GATKArgumentCollection { @Element(required = false) @Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false) - public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.NONE; + public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF; @Element(required = false) - @Hidden - @Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="Gap open penalty. For testing purposes only", required = false) - public double BAQGOP = 1e-3; // todo remove me + @Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty. Default value is 1e-4. 1e-3 is perhaps better for whole genome call sets", required = false) + public double BAQGOP = BAQ.DEFAULT_GOP; /** * Gets the default downsampling method, returned if the user didn't specify any downsampling @@ -348,8 +346,9 @@ public class GATKArgumentCollection { } if (BTIMergeRule != other.BTIMergeRule) return false; - if ( BAQMode != other.BAQMode) - return false; + + if ( BAQMode != other.BAQMode) return false; + if ( BAQGOP != other.BAQGOP ) return false; return true; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index fe5b45b8f..8ea34ad6a 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -168,7 +168,7 @@ public class SAMDataSource implements SimpleDataSource { supplementalFilters, includeReadsWithDeletionAtLoci, generateExtendedEvents, - BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ + BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null // no BAQ ); } @@ -592,7 +592,7 @@ public class SAMDataSource implements SimpleDataSource { if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator); - if (cmode != BAQ.CalculationMode.NONE) + if (cmode != BAQ.CalculationMode.OFF) wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode); wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java index 46f0a6972..5f00d0c15 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java @@ -242,7 +242,7 @@ public class SAMFileWriterStub implements Stub, StingSAMFileWrite * @{inheritDoc} */ public void addAlignment( SAMRecord alignment ) { - if ( engine.getArguments().BAQMode != BAQ.CalculationMode.NONE && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) { + if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) { //System.out.printf("Writing BAQ at OUTPUT TIME%n"); baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode()); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 8dc69eeaa..6264fa1c7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -53,6 +54,7 @@ import java.io.PrintStream; @Reference(window=@Window(start=-1,stop=50)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @By(DataSource.REFERENCE) +@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) public class RealignerTargetCreator extends RodWalker { @Output protected PrintStream out; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java index fa75542d7..1fd673940 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java @@ -28,18 +28,16 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.ParsingUtils; import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFCodec; -import org.broad.tribble.readers.LineReader; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.SimpleTimer; import java.io.PrintStream; import java.io.File; @@ -60,6 +58,8 @@ public class ProfileRodSystem extends RodWalker { @Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false) int MAX_RECORDS = -1; + SimpleTimer timer = new SimpleTimer("myTimer"); + public void initialize() { File rodFile = getRodFile(); @@ -75,13 +75,13 @@ public class ProfileRodSystem extends RodWalker { out.printf("full.decode\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE)); } - startTimer(); // start up timer for map itself + timer.start(); // start up timer for map itself } private enum ReadMode { BY_BYTE, BY_LINE, BY_PARTS, DECODE_LOC, DECODE }; private final double readFile(File f, ReadMode mode) { - startTimer(); + timer.start(); try { byte[] data = new byte[100000]; @@ -120,17 +120,7 @@ public class ProfileRodSystem extends RodWalker { throw new RuntimeException(e); } - return elapsedTime(); - } - - private long startTime = -1l; - private void startTimer() { - startTime = System.currentTimeMillis(); - } - - private double elapsedTime() { - final long curTime = System.currentTimeMillis(); - return (curTime - startTime) / 1000.0; + return timer.getElapsedTime(); } private File getRodFile() { @@ -162,6 +152,6 @@ public class ProfileRodSystem extends RodWalker { } public void onTraversalDone(Integer sum) { - out.printf("gatk.traversal\t%d\t%.2f%n", 0, elapsedTime()); + out.printf("gatk.traversal\t%d\t%.2f%n", 0, timer.getElapsedTime()); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java index 7cc8aba90..25a8eb44e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java @@ -8,9 +8,11 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Argument; @@ -32,10 +34,10 @@ public class ValidateBAQWalker extends ReadWalker { protected int maxReadLen = 1000; @Argument(doc="",required=false) - protected double bw = 1e-3; + protected int bw = 7; @Argument(doc="",required=false) - protected int mbq = 4; + protected boolean samtoolsMode = false; @Argument(doc="only operates on reads with this name",required=false) protected String readName = null; @@ -55,91 +57,142 @@ public class ValidateBAQWalker extends ReadWalker { @Argument(doc="x each read is processed", required=false) protected int magnification = 1; + @Argument(doc="Profile performance", required=false) + protected boolean profile = false; + int counter = 0; BAQ baqHMM = null; // matches current samtools parameters public void initialize() { - baqHMM = new BAQ(bw, 0.1, 7, (byte)mbq); + if ( samtoolsMode ) + baqHMM = new BAQ(1e-3, 0.1, bw, (byte)0, true); + else + baqHMM = new BAQ(); } + long goodReads = 0, badReads = 0; + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { - IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference(); if ( (readName == null || readName.equals(read.getReadName())) && read.getReadLength() <= maxReadLen && (includeReadsWithoutBAQTag || BAQ.hasBAQTag(read) ) ) { if ( baqHMM.excludeReadFromBAQ(read) ) - return 1; - byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); - if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter); - BAQ.BAQCalculationResult baq = null; - - for ( int i = 0; i < magnification; i++ ) - baq = baqHMM.calcBAQFromHMM(read, refReader); + return 0; - boolean fail = false; - boolean print = false; - int badi = 0; + if ( profile ) { + profileBAQ(ref, read); + } else { + validateBAQ(ref, read); + } - if ( BAQ.hasBAQTag(read) ) { - for ( badi = 0; badi < baqFromTag.length; badi++ ) { - if ( baqFromTag[badi] != baq.bq[badi] ) { - if (MathUtils.arrayMin(read.getBaseQualities()) == 0) { - print = true; - fail = strict; - out.printf(" different, but Q0 base detected%n"); - break; - } - else if (readHasSoftClip(read)) { - print = true; - fail = strict; - out.printf(" different, but soft clip detected%n"); - break; - } else if (readHasDeletion(read)) { - print = true; - fail = strict; - out.printf(" different, but deletion detected%n"); - break; - } else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) { - print = fail = true; - out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual()); - break; - } else { - print = fail = true; - break; - } + return 1; + } + + return 0; + } + + SimpleTimer tagTimer = new SimpleTimer("from.tag"); + SimpleTimer baqReadTimer = new SimpleTimer("baq.read"); + SimpleTimer glocalTimer = new SimpleTimer("hmm.glocal"); + + private void profileBAQ(ReferenceContext ref, SAMRecord read) { + IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference(); + BAQ.BAQCalculationResult baq = null; + + tagTimer.restart(); + for ( int i = 0; i < magnification; i++ ) { BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); } + tagTimer.stop(); + + baqReadTimer.restart(); + for ( int i = 0; i < magnification; i++ ) { baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY ); } + baqReadTimer.stop(); + + glocalTimer.restart(); + BAQ.MAG = magnification; + baqHMM.baqRead(read, refReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.DONT_MODIFY); + BAQ.MAG = 1; + glocalTimer.stop(); + } + + + private void validateBAQ(ReferenceContext ref, SAMRecord read) { + IndexedFastaSequenceFile refReader = this.getToolkit().getReferenceDataSource().getReference(); + byte[] baqFromTag = BAQ.calcBAQFromTag(read, false, includeReadsWithoutBAQTag); + if (counter++ % 1000 == 0 || printEachRead) out.printf("Checking read %s (%d)%n", read.getReadName(), counter); + + BAQ.BAQCalculationResult baq = baqHMM.calcBAQFromHMM(read, refReader); + + boolean fail = false; + boolean print = false; + int badi = 0; + + if ( BAQ.hasBAQTag(read) ) { + for ( badi = 0; badi < baqFromTag.length; badi++ ) { + if ( baqFromTag[badi] != baq.bq[badi] ) { + if ( cigarLength(read) != read.getReadLength() ) { + print = true; + fail = false; + out.printf(" different, but cigar length != read length%n"); + break; + } + if (MathUtils.arrayMin(read.getBaseQualities()) == 0) { + print = true; + fail = strict; + out.printf(" different, but Q0 base detected%n"); + break; + } + else if (readHasSoftClip(read) && ! samtoolsMode) { + print = true; + fail = strict; + out.printf(" different, but soft clip detected%n"); + break; + } else if (readHasDeletion(read) ) { // && ! samtoolsMode) { + print = true; + fail = strict; + out.printf(" different, but deletion detected%n"); + break; + } else if ( baq.bq[badi] < baqHMM.getMinBaseQual() ) { + print = fail = true; + out.printf(" Base quality %d < min %d", baq.bq[badi], baqHMM.getMinBaseQual()); + break; + } else { + print = fail = true; + break; } } } - - if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) { - byte[] pos = new byte[baq.bq.length]; - for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i; - - out.printf(" read length : %d%n", read.getReadLength()); - out.printf(" read start : %d%n", read.getAlignmentStart()); - out.printf(" cigar : %s%n", read.getCigarString()); - out.printf(" ref bases : %s%n", new String(baq.refBases)); - out.printf(" read bases : %s%n", new String(read.getReadBases())); - out.printf(" ref length : %d%n", baq.refBases.length); - out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG)); - if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true); - printQuals(" original quals: ", read.getBaseQualities(), true); - printQuals(" baq quals: ", baq.bq, true); - printQuals(" positions : ", pos, true); - printQuals(" original quals: ", read.getBaseQualities()); - if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag); - printQuals(" hmm quals: ", baq.bq); - out.printf(" read bases : %s%n", new String(read.getReadBases())); - out.println(Utils.dupString('-', 80)); - } - - - if ( fail ) - throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d", - read.getReadName(), badi, baqFromTag[badi], baq.bq[badi])); + if ( fail || print ) + badReads++; + else + goodReads++; } - return 1; + if ( fail || printEachRead || ( print && alsoPrintWarnings ) ) { + byte[] pos = new byte[baq.bq.length]; + for ( int i = 0; i < pos.length; i++ ) pos[i] = (byte)i; + + out.printf(" read length : %d%n", read.getReadLength()); + out.printf(" read start : %d (%d unclipped)%n", read.getAlignmentStart(), read.getUnclippedStart()); + out.printf(" cigar : %s%n", read.getCigarString()); + out.printf(" ref bases : %s%n", new String(baq.refBases)); + out.printf(" read bases : %s%n", new String(read.getReadBases())); + out.printf(" ref length : %d%n", baq.refBases.length); + out.printf(" BQ tag : %s%n", read.getStringAttribute(BAQ.BAQ_TAG)); + if ( BAQ.hasBAQTag(read) ) printQuals(" BQ deltas : ", getBAQDeltas(read), true); + printQuals(" original quals: ", read.getBaseQualities(), true); + printQuals(" baq quals: ", baq.bq, true); + printQuals(" positions : ", pos, true); + printQuals(" original quals: ", read.getBaseQualities()); + if ( BAQ.hasBAQTag(read) ) printQuals(" tag quals: ", baqFromTag); + printQuals(" hmm quals: ", baq.bq); + out.printf(" read bases : %s%n", new String(read.getReadBases())); + out.println(Utils.dupString('-', 80)); + } + + + if ( fail ) + throw new StingException(String.format("BAQ from read and from HMM differ in read %s at position %d: tag qual = %d, hmm qual = %d", + read.getReadName(), badi, baqFromTag[badi], baq.bq[badi])); } private final static boolean readHasSoftClip(SAMRecord read) { @@ -196,10 +249,51 @@ public class ValidateBAQWalker extends ReadWalker { return null; } + private int cigarLength(SAMRecord read) { + int readI = 0; + for ( CigarElement elt : read.getCigar().getCigarElements() ) { + int l = elt.getLength(); + switch (elt.getOperator()) { + case N: // cannot handle these + return 0; + case H : case P : // ignore pads and hard clips + break; + case S : + case I : + readI += l; + break; + case D : break; + case M : + readI += l; + break; + default: + throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName()); + } + } + return readI; + } + public Integer reduceInit() { return 0; } public Integer reduce(Integer value, Integer sum) { return value + sum; } + + public void onTraversalDone(Integer nreads) { + if ( profile ) { + out.printf("n.reads baq.per.read calculation time.in.secs%n"); + printTimer(nreads, tagTimer); + printTimer(nreads, glocalTimer); + printTimer(nreads, baqReadTimer); + } else { + out.printf("total reads BAQ'd %d; concordant BAQ reads %d %.4f; discordant BAQ reads %d %.4f%n", nreads, + goodReads, (100.0 * goodReads) / nreads, + badReads, (100.0 * badReads) / nreads); + } + } + + private final void printTimer(int nreads, SimpleTimer timer) { + out.printf("%d %d %s %.2f%n", nreads, magnification, timer.getName(), timer.getElapsedTime()); + } } diff --git a/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/java/src/org/broadinstitute/sting/utils/SimpleTimer.java new file mode 100644 index 000000000..a683eb140 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/SimpleTimer.java @@ -0,0 +1,59 @@ +package org.broadinstitute.sting.utils; + +import java.io.PrintStream; + +/** + * A useful simple system for timing code. + * + * User: depristo + * Date: Dec 10, 2010 + * Time: 9:07:44 AM + */ +public class SimpleTimer { + private String name = ""; + private long elapsed = 0l; + private long startTime = -1l; + boolean running = false; + + public SimpleTimer(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public SimpleTimer start() { + elapsed = 0l; + restart(); + return this; + } + + public SimpleTimer restart() { + running = true; + startTime = currentTime(); + return this; + } + + public long currentTime() { + return System.currentTimeMillis(); + } + + public SimpleTimer stop() { + running = false; + elapsed += currentTime() - startTime; + return this; + } + + public double getElapsedTime() { + if ( running ) + return (currentTime() - startTime) / 1000.0; + else + return elapsed; + } + + + public void printElapsedTime(PrintStream out) { + out.printf("SimpleTimer %s: %.2f%n", name, getElapsedTime()); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 0ec9055a8..bf1de27dd 100644 --- a/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -6,9 +6,6 @@ import net.sf.samtools.CigarOperator; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.BaseUtils; - -import java.util.Arrays; /* The topology of the profile HMM: @@ -39,7 +36,7 @@ public class BAQ { private final static boolean DEBUG = false; public enum CalculationMode { - NONE, // don't apply a BAQ at all, the default + OFF, // don't apply a BAQ at all, the default CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present } @@ -66,11 +63,12 @@ public class BAQ { qual2prob[i] = Math.pow(10, -i/10.); } - public static double DEFAULT_GOP = 1e-3; // todo -- make me final private + public static double DEFAULT_GOP = 1e-4; public double cd = -1; // gap open probility [1e-3] private double ce = 0.1; // gap extension probability [0.1] private int cb = 7; // band width [7] + private boolean includeClippedBases = false; public byte getMinBaseQual() { return minBaseQual; @@ -109,8 +107,10 @@ public class BAQ { * @param b band width * @param minBaseQual All bases with Q < minBaseQual are up'd to this value */ - public BAQ(final double d, final double e, final int b, final byte minBaseQual) { - cd = d; ce = e; cb = b; this.minBaseQual = minBaseQual; + public BAQ(final double d, final double e, final int b, final byte minBaseQual, boolean includeClippedBases) { + cd = d; ce = e; cb = b; + this.minBaseQual = minBaseQual; + this.includeClippedBases = includeClippedBases; initializeCachedData(); } @@ -142,12 +142,6 @@ public class BAQ { return EPSILONS[ref][read][qualB]; } -// private double calcEpsilon( byte ref, byte read, byte qualB ) { -// double qual = qual2prob[qualB < minBaseQual ? minBaseQual : qualB]; -// return (ref > 3 || read > 3)? 1. : ref == read ? 1. - qual : qual * EM; -// } - - // #################################################################################################### // // NOTE -- THIS CODE IS SYNCHRONIZED WITH CODE IN THE SAMTOOLS REPOSITORY. CHANGES TO THIS CODE SHOULD BE @@ -169,12 +163,21 @@ public class BAQ { /*** initialization ***/ // change coordinates int l_ref = ref.length; - //int l_query = query.length; + // set band width int bw2, bw = l_ref > l_query? l_ref : l_query; - if (bw > cb) bw = cb; - if (bw < Math.abs(l_ref - l_query)) bw = Math.abs(l_ref - l_query); + if (cb < Math.abs(l_ref - l_query)) { + bw = Math.abs(l_ref - l_query) + 3; + //System.out.printf("SC cb=%d, bw=%d%n", cb, bw); + } + if (bw > cb) bw = cb; + if (bw < Math.abs(l_ref - l_query)) { + //int bwOld = bw; + bw = Math.abs(l_ref - l_query); + //System.out.printf("old bw is %d, new is %d%n", bwOld, bw); + } + //System.out.printf("c->bw = %d, bw = %d, l_ref = %d, l_query = %d\n", cb, bw, l_ref, l_query); bw2 = bw * 2 + 1; // allocate the forward and backward matrices f[][] and b[][] and the scaling array s[] @@ -185,12 +188,14 @@ public class BAQ { // initialize transition probabilities double sM, sI, bM, bI; sM = sI = 1. / (2 * l_query + 2); - bM = (1 - cd) / l_query; bI = cd / l_query; // (bM+bI)*l_query==1 + bM = (1 - cd) / l_ref; bI = cd / l_ref; // (bM+bI)*l_ref==1 + double[] m = new double[9]; m[0*3+0] = (1 - cd - cd) * (1 - sM); m[0*3+1] = m[0*3+2] = cd * (1 - sM); m[1*3+0] = (1 - ce) * (1 - sI); m[1*3+1] = ce * (1 - sI); m[1*3+2] = 0.; m[2*3+0] = 1 - ce; m[2*3+1] = 0.; m[2*3+2] = ce; + /*** forward ***/ // f[0] f[0][set_u(bw, 0, 0)] = s[0] = 1.; @@ -274,8 +279,7 @@ public class BAQ { for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y; } - // TODO -- this appears to be a null operation overall. For debugging only? - double pb; + double pb; { // b[0] int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1; double sum = 0.; @@ -288,6 +292,7 @@ public class BAQ { pb = b[0][set_u(bw, 0, 0)] = sum / s[0]; // if everything works as is expected, pb == 1.0 } + /*** MAP ***/ for (i = 1; i <= l_query; ++i) { double sum = 0., max = 0.; @@ -450,30 +455,34 @@ public class BAQ { public BAQCalculationResult calcBAQFromHMM(SAMRecord read, IndexedFastaSequenceFile refReader) { // start is alignment start - band width / 2 - size of first I element, if there is one. Stop is similar int offset = getBandWidth() / 2; - long start = Math.max(read.getAlignmentStart() - offset - getFirstInsertionOffset(read), 0); - long stop = read.getAlignmentEnd() + offset + getLastInsertionOffset(read); + long readStart = includeClippedBases ? read.getUnclippedStart() : read.getAlignmentStart(); + long start = Math.max(readStart - offset - getFirstInsertionOffset(read), 0); + long stop = (includeClippedBases ? read.getUnclippedEnd() : read.getAlignmentEnd()) + offset + getLastInsertionOffset(read); if ( stop > refReader.getSequenceDictionary().getSequence(read.getReferenceName()).getSequenceLength() ) { return null; } else { // now that we have the start and stop, get the reference sequence covering it ReferenceSequence refSeq = refReader.getSubsequenceAt(read.getReferenceName(), start, stop); - return calcBAQFromHMM(read, refSeq.getBases(), (int)(start - read.getAlignmentStart())); + return calcBAQFromHMM(read, refSeq.getBases(), (int)(start - readStart)); } } + public static int MAG = 1; // todo -- remove me for performance testing only public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) { // note -- assumes ref is offset from the *CLIPPED* start BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref); int queryLen = queryEnd - queryStart; - hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq); + for ( int i = 0; i < MAG; i++ ) + hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq); return baqResult; } // we need to bad ref by at least the bandwidth / 2 on either side public BAQCalculationResult calcBAQFromHMM(SAMRecord read, byte[] ref, int refOffset) { - int queryStart = (int)(read.getAlignmentStart() - read.getUnclippedStart()); - int queryEnd = (int)(read.getReadLength() - (read.getUnclippedEnd() - read.getAlignmentEnd())); + // todo -- need to handle the case where the cigar sum of lengths doesn't cover the whole read + int queryStart = includeClippedBases ? 0 : read.getAlignmentStart() - read.getUnclippedStart(); + int queryEnd = read.getReadLength() - (includeClippedBases ? 0 : read.getUnclippedEnd() - read.getAlignmentEnd()); BAQCalculationResult baqResult = calcBAQFromHMM(ref, read.getReadBases(), read.getBaseQualities(), queryStart, queryEnd); // cap quals @@ -485,7 +494,8 @@ public class BAQ { return null; case H : case P : // ignore pads and hard clips break; - case I : case S : + case S : refI += l; // move the reference too, in addition to I + case I : // todo -- is it really the case that we want to treat I and S the same? for ( int i = readI; i < readI + l; i++ ) baqResult.bq[i] = baqResult.rawQuals[i]; readI += l; @@ -502,6 +512,8 @@ public class BAQ { throw new ReviewedStingException("BUG: Unexpected CIGAR element " + elt + " in read " + read.getReadName()); } } + if ( readI != read.getReadLength() ) // odd cigar string + System.arraycopy(baqResult.rawQuals, 0, baqResult.bq, 0, baqResult.bq.length); return baqResult; } @@ -532,7 +544,7 @@ public class BAQ { if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName()); byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them - if ( calculationType == CalculationMode.NONE ) { // we don't want to do anything + if ( calculationType == CalculationMode.OFF) { // we don't want to do anything ; // just fall though } else if ( excludeReadFromBAQ(read) ) { ; // just fall through diff --git a/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java index c9e634e86..0353559fe 100644 --- a/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java +++ b/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java @@ -28,7 +28,7 @@ public class BAQSamIterator implements StingSAMIterator { * @param qmode */ public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) { - if ( cmode == BAQ.CalculationMode.NONE ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode NONE"); + if ( cmode == BAQ.CalculationMode.OFF) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF"); if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY"); this.refReader = refReader; diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index c39af0139..298a0995a 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -148,7 +148,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { new ArrayList(), false, false, - BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ + BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null // no BAQ ); } }