diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index c0bc2c5a7..afb306b6d 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -50,6 +50,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -544,6 +545,9 @@ public abstract class AbstractGenomeAnalysisEngine { private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) { DownsamplingMethod method = getDownsamplingMethod(); + if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.NONE ) + throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); + return new SAMDataSource( unpackBAMFileList(argCollection.samFiles), genomeLocParser, @@ -555,9 +559,18 @@ public abstract class AbstractGenomeAnalysisEngine { filters, includeReadsWithDeletionAtLoci(), generateExtendedEvents(), - argCollection.BAQMode, refReader); + getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.NONE, + getWalkerBAQQualityMode(), + refReader); } + /** + * Overloaded to break abstraction. Thanks Khalid :-( + * @return + */ + public BAQ.QualityMode getWalkerBAQQualityMode() { return BAQ.QualityMode.DONT_MODIFY; } + public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return BAQ.ApplicationTime.ON_INPUT; } + /** * Opens a reference sequence file paired with an index. * diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 07eed89c7..2a006e7de 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -47,6 +47,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -181,6 +182,9 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { return method; } + public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); } + public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); } + @Override protected boolean generateExtendedEvents() { return walker.generateExtendedEvents(); diff --git a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index b93b88152..63f6680d4 100755 --- a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -38,7 +38,8 @@ public class ReadProperties { private boolean includeReadsWithDeletionAtLoci = false; private boolean useOriginalBaseQualities = false; private boolean generateExtendedEvents = false; - private BAQ.Mode mode = BAQ.Mode.NONE; + private BAQ.CalculationMode cmode = BAQ.CalculationMode.NONE; + private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY; IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired // do we want to generate additional piles of "extended" events (indels) @@ -127,9 +128,8 @@ public class ReadProperties { } - public BAQ.Mode getBAQMode() { - return mode; - } + public BAQ.QualityMode getBAQQualityMode() { return qmode; } + public BAQ.CalculationMode getBAQCalculationMode() { return cmode; } public IndexedFastaSequenceFile getRefReader() { return refReader; @@ -153,7 +153,8 @@ public class ReadProperties { * @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method * will explicitly list reads with deletion over the current reference base; otherwise, only observed * bases will be seen in the pileups, and the deletions will be skipped silently. - * @param mode How should we apply the BAQ calculation to the reads? + * @param cmode How should we apply the BAQ calculation to the reads? + * @param qmode How should we apply the BAQ calculation to the reads? * @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation */ public ReadProperties( List samFiles, @@ -166,7 +167,8 @@ public class ReadProperties { Collection supplementalFilters, boolean includeReadsWithDeletionAtLoci, boolean generateExtendedEvents, - BAQ.Mode mode, + BAQ.CalculationMode cmode, + BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader ) { this.readers = samFiles; this.header = header; @@ -178,7 +180,8 @@ public class ReadProperties { this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.generateExtendedEvents = generateExtendedEvents; this.useOriginalBaseQualities = useOriginalBaseQualities; - this.mode = mode; + this.cmode = cmode; + this.qmode = qmode; this.refReader = refReader; } } diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index cb1da2b99..635da4a03 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.help.DescriptionTaglet; import org.broadinstitute.sting.utils.help.DisplayNameTaglet; import org.broadinstitute.sting.utils.help.SummaryTaglet; +import org.broadinstitute.sting.utils.baq.BAQ; import java.util.*; @@ -359,6 +360,14 @@ public class WalkerManager extends PluginManager { return downsamplingMethod; } + public static BAQ.QualityMode getBAQQualityMode(Walker walker) { + return walker.getClass().getAnnotation(BAQMode.class).QualityMode(); + } + + public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) { + return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime(); + } + /** * Gets the type of downsampling method requested by the walker. If an alternative * downsampling method is specified on the command-line, the command-line version will diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 6ee58b862..2eef79661 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -152,7 +152,7 @@ public class GATKArgumentCollection { @Element(required = false) @Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false) - public BAQ.Mode BAQMode = BAQ.Mode.NONE; + public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.NONE; /** * Gets the default downsampling method, returned if the user didn't specify any downsampling diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 9b428f23f..bea535bf7 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -168,7 +168,7 @@ public class SAMDataSource implements SimpleDataSource { supplementalFilters, includeReadsWithDeletionAtLoci, generateExtendedEvents, - BAQ.Mode.NONE, null // no BAQ + BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ ); } @@ -199,7 +199,8 @@ public class SAMDataSource implements SimpleDataSource { Collection supplementalFilters, boolean includeReadsWithDeletionAtLoci, boolean generateExtendedEvents, - BAQ.Mode Mode, + BAQ.CalculationMode cmode, + BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader ) { this.readMetrics = new ReadMetrics(); @@ -247,7 +248,7 @@ public class SAMDataSource implements SimpleDataSource { supplementalFilters, includeReadsWithDeletionAtLoci, generateExtendedEvents, - Mode, refReader ); + cmode, qmode, refReader ); // cache the read group id (original) -> read group id (merged) // and read group id (merged) -> read group id (original) mappings. @@ -517,7 +518,7 @@ public class SAMDataSource implements SimpleDataSource { readProperties.getDownsamplingMethod().toFraction, readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), readProperties.getSupplementalFilters(), - readProperties.getBAQMode(), readProperties.getRefReader()); + readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader()); } /** @@ -541,7 +542,7 @@ public class SAMDataSource implements SimpleDataSource { readProperties.getDownsamplingMethod().toFraction, readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), readProperties.getSupplementalFilters(), - readProperties.getBAQMode(), readProperties.getRefReader()); + readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader()); } /** @@ -575,7 +576,7 @@ public class SAMDataSource implements SimpleDataSource { Double downsamplingFraction, Boolean noValidationOfReadOrder, Collection supplementalFilters, - BAQ.Mode mode, IndexedFastaSequenceFile refReader ) { + BAQ.CalculationMode cmode, BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader ) { wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities); // NOTE: this (and other filtering) should be done before on-the-fly sorting @@ -588,8 +589,8 @@ public class SAMDataSource implements SimpleDataSource { if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator); - if (mode != BAQ.Mode.NONE) - wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, mode); + if (cmode != BAQ.CalculationMode.NONE) + wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode); wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java index ca73ac29d..d1e80a9ff 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.baq.BAQ; /** * A stub for routing and management of SAM file reading and writing. @@ -100,6 +101,11 @@ public class SAMFileWriterStub implements Stub, StingSAMFileWrite */ private boolean writeStarted = false; + /** + * HMM for BAQ, if needed + */ + BAQ baqHMM = new BAQ(); + /** * Create a new stub given the requested SAM file and compression level. * @param engine source of header data, maybe other data about input files. @@ -236,6 +242,11 @@ public class SAMFileWriterStub implements Stub, StingSAMFileWrite * @{inheritDoc} */ public void addAlignment( SAMRecord alignment ) { + if ( engine.getArguments().BAQMode != BAQ.CalculationMode.NONE && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) { + System.out.printf("Writing BAQ at OUTPUT TIME%n"); + baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode()); + } + writeStarted = true; outputTracker.getStorage(this).addAlignment(alignment); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java b/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java new file mode 100644 index 000000000..99dd46cbe --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java @@ -0,0 +1,34 @@ +package org.broadinstitute.sting.gatk.walkers; + +import java.lang.annotation.Documented; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import java.lang.annotation.ElementType; + +/** + * User: hanna + * Date: May 14, 2009 + * Time: 1:51:22 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Allows the walker to indicate what type of data it wants to consume. + */ + +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface BAQMode { + public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS; + public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT; +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index f6c24a2bd..368b717a7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.baq.BAQ; import java.io.PrintStream; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 04b5b5e77..fc652ed8f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -30,6 +30,7 @@ import java.util.List; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; import org.apache.log4j.Logger; @@ -41,6 +42,7 @@ import org.apache.log4j.Logger; * To change this template use File | Settings | File Templates. */ @ReadFilters(MalformedReadFilter.class) +@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); private GenomeAnalysisEngine toolkit; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 568e26c92..c6f6d986a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.vcf.VCFUtils; @@ -45,6 +46,9 @@ import java.io.PrintStream; * A variant caller which unifies the approaches of several disparate callers. Works for single-sample and * multi-sample data. The user can choose from several different incorporated calculation models. */ +// todo -- change when UG is generalized to do BAQ as necessary +//@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER) +@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.REFERENCE) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index c10c45373..820e43a4c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -41,10 +41,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; import org.broadinstitute.sting.utils.interval.NwayIntervalMergingIterator; @@ -65,6 +67,7 @@ import java.util.*; * appropriate alternate reference (i.e. indel) exists and updates SAMRecords accordingly. */ //Reference(window=@Window(start=-30,stop=30)) +@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) public class IndelRealigner extends ReadWalker { public static final String ORIGINAL_CIGAR_TAG = "OC"; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java index 458a0097e..d7cd34c47 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidateBAQWalker.java @@ -20,6 +20,7 @@ import java.io.PrintStream; * Can also count the number of reads matching a given criterion using read filters (see the * --read-filter command line argument). Simplest example of a read-backed analysis. */ +@BAQMode(QualityMode = BAQ.QualityMode.DONT_MODIFY, ApplicationTime = BAQ.ApplicationTime.HANDLED_IN_WALKER) @Reference(window=@Window(start=-5,stop=5)) @Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES}) public class ValidateBAQWalker extends ReadWalker { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 98ce9b7ef..21ef32e87 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.commandline.Argument; @@ -70,6 +71,7 @@ import java.util.Map; * @help.summary First pass of the recalibration. Generates recalibration table based on various user-specified covariates (such as reported quality score, cycle, and dinucleotide). */ +@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file @ReadFilters( {ZeroMappingQualityReadFilter.class} ) // Filter out all reads with zero mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 38cb18d20..522dad2b9 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -37,10 +37,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.NestedHashMap; @@ -49,6 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -68,6 +66,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * @help.summary Second pass of the recalibration. Uses the table generated by CountCovariates to update the base quality scores of the input bam file using a sequential table calculation making the base quality scores more accurately reflect the actual quality of the bases as mesaured by reference mismatch rate. */ +@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @WalkerName("TableRecalibration") @Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta public class TableRecalibrationWalker extends ReadWalker { diff --git a/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 86a9f7b08..d7d1fb1c6 100644 --- a/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -36,12 +36,24 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; public class BAQ { private final static boolean DEBUG = false; - public enum Mode { + public enum CalculationMode { NONE, // don't apply a BAQ at all, the default - USE_TAG_ONLY, // only use the TAG, if present, in the reads - ADD_TAG_ONLY, // calculate the BAQ, but write it into the reads as the BAQ tag, leaving QUAL field alone CALCULATE_AS_NECESSARY, // do HMM BAQ calculation on the fly, as necessary, if there's no tag - RECALCULATE_ALWAYS // do HMM BAQ calculation on the fly, regardless of whether there's a tag present + RECALCULATE // do HMM BAQ calculation on the fly, regardless of whether there's a tag present + } + + /** these are features that only the walker can override */ + public enum QualityMode { + ADD_TAG, // calculate the BAQ, but write it into the reads as the BAQ tag, leaving QUAL field alone + OVERWRITE_QUALS, // overwrite the quality field directly + DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function. + } + + public enum ApplicationTime { + FORBIDDEN, // Walker does not tolerate BAQ input + ON_INPUT, // apply the BAQ calculation to the incoming reads, the default + ON_OUTPUT, // apply the BAQ calculation to outgoing read streams + HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself } public static final String BAQ_TAG = "BQ"; @@ -487,35 +499,36 @@ public class BAQ { * @param read * @param refReader * @param calculationType - * @return + * @return BQ qualities for use, in case qmode is DONT_MODIFY */ - public SAMRecord baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, Mode calculationType ) { + public byte[] baqRead(SAMRecord read, IndexedFastaSequenceFile refReader, CalculationMode calculationType, QualityMode qmode ) { if ( DEBUG ) System.out.printf("BAQ %s read %s%n", calculationType, read.getReadName()); - if ( calculationType == Mode.NONE ) { // we don't want to do anything + + byte[] BAQQuals = read.getBaseQualities(); // in general we are overwriting quals, so just get a pointer to them + if ( calculationType == CalculationMode.NONE ) { // we don't want to do anything ; // just fall though } else if ( excludeReadFromBAQ(read) ) { ; // just fall through - } else if ( calculationType == Mode.USE_TAG_ONLY ) { - calcBAQFromTag(read, true, true); } else { - if ( calculationType == Mode.RECALCULATE_ALWAYS || ! hasBAQTag(read) || calculationType == Mode.ADD_TAG_ONLY ) { + if ( calculationType == CalculationMode.RECALCULATE || ! hasBAQTag(read) ) { if ( DEBUG ) System.out.printf(" Calculating BAQ on the fly%n"); BAQCalculationResult hmmResult = calcBAQFromHMM(read, refReader); if ( hmmResult != null ) { - if ( calculationType == Mode.ADD_TAG_ONLY ) - addBAQTag(read, hmmResult.bq); - else - // modify QUALs directly - System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length); + switch ( qmode ) { + case ADD_TAG: addBAQTag(read, hmmResult.bq); break; + case OVERWRITE_QUALS: System.arraycopy(hmmResult.bq, 0, read.getBaseQualities(), 0, hmmResult.bq.length); break; + case DONT_MODIFY: BAQQuals = hmmResult.bq; break; + default: throw new ReviewedStingException("BUG: unexpected qmode " + qmode); + } } - } else { + } else if ( qmode == QualityMode.OVERWRITE_QUALS ) { // only makes sense if we are overwriting quals if ( DEBUG ) System.out.printf(" Taking BAQ from tag%n"); // this overwrites the original qualities calcBAQFromTag(read, true, false); } } - return read; + return BAQQuals; } /** diff --git a/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java index 7fff240f2..b1db66e7e 100644 --- a/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java +++ b/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.baq; import net.sf.samtools.SAMRecord; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import java.util.Iterator; @@ -14,7 +15,8 @@ public class BAQSamIterator implements StingSAMIterator { StingSAMIterator it; BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters IndexedFastaSequenceFile refReader = null; - BAQ.Mode mode; + BAQ.CalculationMode cmode; + BAQ.QualityMode qmode; /** * Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming @@ -22,17 +24,28 @@ public class BAQSamIterator implements StingSAMIterator { * * @param refReader * @param it - * @param mode + * @param cmode + * @param qmode */ - public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.Mode mode) { - this.refReader =refReader; + public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) { + if ( cmode == BAQ.CalculationMode.NONE ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode NONE"); + if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY"); + + this.refReader = refReader; this.it = it; - this.mode = mode; + this.cmode = cmode; + this.qmode = qmode; } - public SAMRecord next() { return baqHMM.baqRead(it.next(), refReader, mode); } - public boolean hasNext() { return this.it.hasNext(); } - public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } - public void close() { it.close(); } + public SAMRecord next() { + System.out.printf("BAQing during input%n"); + SAMRecord read = it.next(); + baqHMM.baqRead(read, refReader, cmode, qmode); + return read; + } + + public boolean hasNext() { return this.it.hasNext(); } + public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } + public void close() { it.close(); } public Iterator iterator() { return this; } } diff --git a/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index 1de11264f..bff06ede3 100644 --- a/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -124,6 +124,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { cacheMisses++; result = super.getSubsequenceAt(contig, start, stop); } else { + // todo -- potential optimization is to check if contig.name == contig, as this in generally will be true SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig); if (stop > contigInfo.getSequenceLength()) @@ -132,7 +133,6 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { synchronized (lock) { // access to shared cache information must be protected if ( start < cacheStart || stop > cacheStop || cache == null || cache.getContigIndex() != contigInfo.getSequenceIndex() ) { cacheMisses++; - cacheStart = Math.max(start - cacheMissBackup, 0); cacheStop = Math.min(cacheStart + cacheSize, contigInfo.getSequenceLength()); cache = super.getSubsequenceAt(contig, cacheStart, cacheStop); diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index e61d6f836..c39af0139 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -148,7 +148,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { new ArrayList(), false, false, - BAQ.Mode.NONE, null // no BAQ + BAQ.CalculationMode.NONE, BAQ.QualityMode.DONT_MODIFY, null // no BAQ ); } }