From 817ece37a20cf935a9f38cc27b7618e45f5e1dfd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 31 Aug 2012 11:42:50 -0400 Subject: [PATCH] General infrastructure for ReadTransformers -- These are like read filters but can be applied either on input, on output, of handled by the walker -- Previous example of BAQ now uses the general framework -- Resulted in massive conceptual cleanup of SAMDataSource and ReadProperties! Yeah! -- BQSR now uses this framework. We can now do BQSR on input, on output, or within a walker -- PrintReads now handles all read transformers in the walker in map, enabling us to parallelize PrintReads with BAQ and BQSR -- Currently BQSR is excepting in parallel, which subsequent commit with fix -- Removed global variable setting in GenomeAnalysisEngine for BAQ, as command line parameters are cleanly handled by ReadTransformer infrastructure -- In principle ReadFilters are just a special kind of ReadTransformer, but this refactoring is larger than I can do. It's a JIRA entry -- Many files touched simply due to the refactoring and renaming of classes --- .../haplotypecaller/HaplotypeCaller.java | 14 +- .../sting/gatk/GenomeAnalysisEngine.java | 58 +++++-- .../sting/gatk/ReadProperties.java | 38 ++--- .../sting/gatk/WalkerManager.java | 9 +- .../gatk/datasources/reads/SAMDataSource.java | 41 ++--- .../gatk/io/stubs/SAMFileWriterStub.java | 40 +++-- .../sting/gatk/iterators/ReadTransformer.java | 144 ++++++++++++++++++ .../gatk/iterators/ReadTransformersMode.java | 28 ++++ .../sting/gatk/walkers/BAQMode.java | 4 +- .../sting/gatk/walkers/PrintReads.java | 20 ++- .../sting/gatk/walkers/Walker.java | 5 +- .../gatk/walkers/bqsr/BaseRecalibrator.java | 6 +- .../walkers/genotyper/UnifiedGenotyper.java | 3 +- .../gatk/walkers/indels/IndelRealigner.java | 3 +- .../indels/RealignerTargetCreator.java | 4 +- .../broadinstitute/sting/utils/baq/BAQ.java | 20 +-- .../sting/utils/baq/BAQReadTransformer.java | 49 ++++++ .../sting/utils/baq/BAQSamIterator.java | 59 ------- .../utils/baq/ReadTransformingIterator.java | 44 ++++++ .../sting/utils/recalibration/BQSRMode.java | 30 ++++ .../recalibration/BQSRReadTransformer.java | 40 +++++ .../utils/recalibration/BQSRSamIterator.java | 50 ------ 22 files changed, 485 insertions(+), 224 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 3d41b7233..f4d8a88e0 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -27,24 +27,23 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.genotyper.*; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadMateFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.sting.gatk.walkers.genotyper.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.collections.Pair; @@ -52,6 +51,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -101,7 +101,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) -@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) +@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) @ActiveRegionExtension(extension=65, maxRegion=300) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 00614b9aa..b9b5e452d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -42,6 +42,8 @@ import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.stubs.Stub; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; @@ -49,8 +51,8 @@ import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.samples.SampleDBBuilder; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -131,6 +133,11 @@ public class GenomeAnalysisEngine { */ private Collection filters; + /** + * Collection of the read transformers applied to the reads + */ + private List readTransformers; + /** * Controls the allocation of threads between CPU vs IO. */ @@ -354,6 +361,39 @@ public class GenomeAnalysisEngine { return Collections.unmodifiableList(filters); } + /** + * Returns a list of active, initialized read transformers + * + * @param walker the walker we need to apply read transformers too + * @return a non-null list of read transformers + */ + public void initializeReadTransformers(final Walker walker) { + final List activeTransformers = new ArrayList(); + + final ReadTransformersMode overrideMode = WalkerManager.getWalkerAnnotation(walker, ReadTransformersMode.class); + final ReadTransformer.ApplicationTime overrideTime = overrideMode != null ? overrideMode.ApplicationTime() : null; + + final PluginManager pluginManager = new PluginManager(ReadTransformer.class); + + for ( final ReadTransformer transformer : pluginManager.createAllTypes() ) { + transformer.initialize(overrideTime, this, walker); + if ( transformer.enabled() ) + activeTransformers.add(transformer); + } + + setReadTransformers(activeTransformers); + } + + public List getReadTransformers() { + return readTransformers; + } + + private void setReadTransformers(final List readTransformers) { + if ( readTransformers == null ) + throw new ReviewedStingException("read transformers cannot be null"); + this.readTransformers = readTransformers; + } + /** * Parse out the thread allocation from the given command-line argument. */ @@ -419,9 +459,6 @@ public class GenomeAnalysisEngine { argCollection.setDownsamplingMethod(method); } - public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); } - public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); } - protected boolean includeReadsWithDeletionAtLoci() { return walker.includeReadsWithDeletionAtLoci(); } @@ -702,13 +739,12 @@ public class GenomeAnalysisEngine { protected void initializeDataSources() { logger.info("Strictness is " + argCollection.strictnessLevel); - // TODO -- REMOVE ME - BAQ.DEFAULT_GOP = argCollection.BAQGOP; - validateSuppliedReference(); setReferenceDataSource(argCollection.referenceFile); validateSuppliedReads(); + initializeReadTransformers(walker); + readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference()); for (ReadFilter filter : filters) @@ -795,9 +831,6 @@ public class GenomeAnalysisEngine { // interrogating for the downsample method during command line recreation. setDownsamplingMethod(method); - if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) - throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); - if (argCollection.removeProgramRecords && argCollection.keepProgramRecords) throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options"); @@ -817,11 +850,8 @@ public class GenomeAnalysisEngine { method, new ValidationExclusion(Arrays.asList(argCollection.unsafe)), filters, + readTransformers, includeReadsWithDeletionAtLoci(), - getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF, - getWalkerBAQQualityMode(), - refReader, - getBaseRecalibration(), argCollection.defaultBaseQualities, removeProgramRecords); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index e02b9d5af..b2d4d202d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -1,15 +1,14 @@ package org.broadinstitute.sting.gatk; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import java.util.Collection; +import java.util.List; /** * User: hanna * Date: May 14, 2009 @@ -34,12 +33,9 @@ public class ReadProperties { private final DownsamplingMethod downsamplingMethod; private final ValidationExclusion exclusionList; private final Collection supplementalFilters; + private final List readTransformers; private final boolean includeReadsWithDeletionAtLoci; private final boolean useOriginalBaseQualities; - private final BAQ.CalculationMode cmode; - private final BAQ.QualityMode qmode; - private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired - private final BaseRecalibration bqsrApplier; private final byte defaultBaseQualities; /** @@ -95,6 +91,11 @@ public class ReadProperties { return supplementalFilters; } + + public List getReadTransformers() { + return readTransformers; + } + /** * Return whether to use original base qualities. * @return Whether to use original base qualities. @@ -103,16 +104,6 @@ public class ReadProperties { return useOriginalBaseQualities; } - - public BAQ.QualityMode getBAQQualityMode() { return qmode; } - public BAQ.CalculationMode getBAQCalculationMode() { return cmode; } - - public IndexedFastaSequenceFile getRefReader() { - return refReader; - } - - public BaseRecalibration getBQSRApplier() { return bqsrApplier; } - /** * @return Default base quality value to fill reads missing base quality information. */ @@ -134,9 +125,6 @@ public class ReadProperties { * @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method * will explicitly list reads with deletion over the current reference base; otherwise, only observed * bases will be seen in the pileups, and the deletions will be skipped silently. - * @param cmode How should we apply the BAQ calculation to the reads? - * @param qmode How should we apply the BAQ calculation to the reads? - * @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation * @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality. */ public ReadProperties( Collection samFiles, @@ -146,11 +134,8 @@ public class ReadProperties { DownsamplingMethod downsamplingMethod, ValidationExclusion exclusionList, Collection supplementalFilters, + List readTransformers, boolean includeReadsWithDeletionAtLoci, - BAQ.CalculationMode cmode, - BAQ.QualityMode qmode, - IndexedFastaSequenceFile refReader, - BaseRecalibration bqsrApplier, byte defaultBaseQualities) { this.readers = samFiles; this.header = header; @@ -158,12 +143,9 @@ public class ReadProperties { this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod; this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList; this.supplementalFilters = supplementalFilters; + this.readTransformers = readTransformers; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.useOriginalBaseQualities = useOriginalBaseQualities; - this.cmode = cmode; - this.qmode = qmode; - this.refReader = refReader; - this.bqsrApplier = bqsrApplier; this.defaultBaseQualities = defaultBaseQualities; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 8843d4bfe..ae59ce438 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -29,13 +29,14 @@ import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet; import org.broadinstitute.sting.utils.text.TextFormattingUtils; +import java.lang.annotation.Annotation; import java.util.*; /** @@ -319,11 +320,11 @@ public class WalkerManager extends PluginManager { return downsamplingMethod; } - public static BAQ.QualityMode getBAQQualityMode(Walker walker) { - return walker.getClass().getAnnotation(BAQMode.class).QualityMode(); + public static T getWalkerAnnotation(final Walker walker, final Class clazz) { + return walker.getClass().getAnnotation(clazz); } - public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) { + public static ReadTransformer.ApplicationTime getBAQApplicationTime(Walker walker) { return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 2b88775b1..7d027438b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.datasources.reads; -import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.sam.MergingSamRecordIterator; import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.samtools.*; @@ -42,12 +41,9 @@ import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.sting.utils.baq.BAQSamIterator; +import org.broadinstitute.sting.utils.baq.ReadTransformingIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import java.io.File; @@ -200,11 +196,8 @@ public class SAMDataSource { downsamplingMethod, exclusionList, supplementalFilters, + Collections.emptyList(), includeReadsWithDeletionAtLoci, - BAQ.CalculationMode.OFF, - BAQ.QualityMode.DONT_MODIFY, - null, // no BAQ - null, // no BQSR (byte) -1, false); } @@ -234,11 +227,8 @@ public class SAMDataSource { DownsamplingMethod downsamplingMethod, ValidationExclusion exclusionList, Collection supplementalFilters, + List readTransformers, boolean includeReadsWithDeletionAtLoci, - BAQ.CalculationMode cmode, - BAQ.QualityMode qmode, - IndexedFastaSequenceFile refReader, - BaseRecalibration bqsrApplier, byte defaultBaseQualities, boolean removeProgramRecords) { this.readMetrics = new ReadMetrics(); @@ -308,11 +298,8 @@ public class SAMDataSource { downsamplingMethod, exclusionList, supplementalFilters, + readTransformers, includeReadsWithDeletionAtLoci, - cmode, - qmode, - refReader, - bqsrApplier, defaultBaseQualities); // cache the read group id (original) -> read group id (merged) @@ -603,10 +590,7 @@ public class SAMDataSource { readProperties.getDownsamplingMethod().toFraction, readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), readProperties.getSupplementalFilters(), - readProperties.getBAQCalculationMode(), - readProperties.getBAQQualityMode(), - readProperties.getRefReader(), - readProperties.getBQSRApplier(), + readProperties.getReadTransformers(), readProperties.defaultBaseQualities()); } @@ -673,10 +657,7 @@ public class SAMDataSource { Double downsamplingFraction, Boolean noValidationOfReadOrder, Collection supplementalFilters, - BAQ.CalculationMode cmode, - BAQ.QualityMode qmode, - IndexedFastaSequenceFile refReader, - BaseRecalibration bqsrApplier, + List readTransformers, byte defaultBaseQualities) { // *********************************************************************************** // @@ -698,11 +679,11 @@ public class SAMDataSource { // only wrap if we are replacing the original qualities or using a default base quality wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); - if (bqsrApplier != null) - wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier); - - if (cmode != BAQ.CalculationMode.OFF) - wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode); + // set up read transformers + for ( final ReadTransformer readTransformer : readTransformers ) { + if ( readTransformer.enabled() && readTransformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_INPUT ) + wrappedIterator = new ReadTransformingIterator(wrappedIterator, readTransformer); + } return wrappedIterator; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java index d8e59a3dd..d2e7066e9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterStub.java @@ -31,12 +31,16 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; /** * A stub for routing and management of SAM file reading and writing. @@ -116,15 +120,15 @@ public class SAMFileWriterStub implements Stub, StingSAMFileWrite */ private boolean simplifyBAM = false; + private List onOutputReadTransformers = null; + /** * Create a new stub given the requested SAM file and compression level. * @param engine source of header data, maybe other data about input files. * @param samFile SAM file to (ultimately) create. */ public SAMFileWriterStub( GenomeAnalysisEngine engine, File samFile ) { - this.engine = engine; - this.samFile = samFile; - this.samOutputStream = null; + this(engine, samFile, null); } /** @@ -133,8 +137,12 @@ public class SAMFileWriterStub implements Stub, StingSAMFileWrite * @param stream Output stream to which data should be written. */ public SAMFileWriterStub( GenomeAnalysisEngine engine, OutputStream stream ) { + this(engine, null, stream); + } + + private SAMFileWriterStub(final GenomeAnalysisEngine engine, final File samFile, final OutputStream stream) { this.engine = engine; - this.samFile = null; + this.samFile = samFile; this.samOutputStream = stream; } @@ -274,17 +282,29 @@ public class SAMFileWriterStub implements Stub, StingSAMFileWrite this.headerOverride = header; } + private void initializeReadTransformers() { + this.onOutputReadTransformers = new ArrayList(engine.getReadTransformers().size()); + for ( final ReadTransformer transformer : engine.getReadTransformers() ) { + if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT ) + onOutputReadTransformers.add(transformer); + } + } + /** * @{inheritDoc} */ - public void addAlignment( SAMRecord alignment ) { - if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) { - //System.out.printf("Writing BAQ at OUTPUT TIME%n"); - baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode()); - } + public void addAlignment( final SAMRecord readIn ) { + if ( onOutputReadTransformers == null ) + initializeReadTransformers(); + + GATKSAMRecord workingRead = (GATKSAMRecord)readIn; + + // run on output read transformers + for ( final ReadTransformer transform : onOutputReadTransformers ) + workingRead = transform.apply(workingRead); writeStarted = true; - outputTracker.getStorage(this).addAlignment(alignment); + outputTracker.getStorage(this).addAlignment(workingRead); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java new file mode 100644 index 000000000..d307789f3 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java @@ -0,0 +1,144 @@ +package org.broadinstitute.sting.gatk.iterators; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * Baseclass used to describe a read transformer like BAQ and BQSR + * + * Read transformers are plugable infrastructure that modify read state + * either on input, on output, or within walkers themselves. + * + * The function apply() is called on each read seen by the GATK (after passing + * all ReadFilters) and it can do as it sees fit (without modifying the alignment) + * to the read to change qualities, add tags, etc. + * + * Initialize is called once right before the GATK traversal begins providing + * the ReadTransformer with the ability to collect and initialize data from the + * engine. + * + * Note that all ReadTransformers within the classpath are created and initialized. If one + * shouldn't be run it should look at the command line options of the engine and override + * the enabled. + * + * @since 8/31/12 + * @author depristo + */ +abstract public class ReadTransformer { + /** + * When should this read transform be applied? + */ + private ApplicationTime applicationTime; + + /** + * Keep track of whether we've been initialized already, and ensure it's not called more than once. + */ + private boolean initialized = false; + + protected ReadTransformer() {} + + /** + * Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine. + * + * @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself + * @param engine the engine, for initializing values + * @param walker the walker we intend to run + */ + @Requires({"initialized == false", "engine != null", "walker == null"}) + @Ensures("initialized == true") + public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) { + if ( engine == null ) throw new IllegalArgumentException("engine cannot be null"); + if ( walker == null ) throw new IllegalArgumentException("walker cannot be null"); + + this.applicationTime = initializeSub(engine, walker); + if ( overrideTime != null ) this.applicationTime = overrideTime; + initialized = true; + } + + /** + * Subclasses must override this to initialize themeselves + * + * @param engine the engine, for initializing values + * @param walker the walker we intend to run + * @return the point of time we'd like this read transform to be run + */ + @Requires({"engine != null", "walker != null"}) + @Ensures("result != null") + protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker); + + /** + * Should this ReadTransformer be activated? Called after initialize, which allows this + * read transformer to look at its arguments and decide if it should be active. All + * ReadTransformers must override this, as by default they are not enabled. + * + * @return true if this ReadTransformer should be used on the read stream + */ + public boolean enabled() { + return false; + } + + /** + * Has this transformer been initialized? + * + * @return true if it has + */ + public final boolean isInitialized() { + return initialized; + } + + /** + * When should we apply this read transformer? + * + * @return true if yes + */ + public final ApplicationTime getApplicationTime() { + return applicationTime; + } + + /** + * Primary interface function for a read transform to actually do some work + * + * The function apply() is called on each read seen by the GATK (after passing + * all ReadFilters) and it can do as it sees fit (without modifying the alignment) + * to the read to change qualities, add tags, etc. + * + * @param read the read to transform + * @return the transformed read + */ + @Requires("read != null") + @Ensures("result != null") + abstract public GATKSAMRecord apply(final GATKSAMRecord read); + + @Override + public String toString() { + return getClass().getSimpleName(); + } + + /** + * When should a read transformer be applied? + */ + public static enum ApplicationTime { + /** + * Walker does not tolerate this read transformer + */ + FORBIDDEN, + + /** + * apply the transformation to the incoming reads, the default + */ + ON_INPUT, + + /** + * apply the transformation to the outgoing read stream + */ + ON_OUTPUT, + + /** + * the walker will deal with the calculation itself + */ + HANDLED_IN_WALKER + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java new file mode 100644 index 000000000..be227619f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformersMode.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.gatk.iterators; + +import java.lang.annotation.*; + +/** + * User: hanna + * Date: May 14, 2009 + * Time: 1:51:22 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Allows the walker to indicate what type of data it wants to consume. + */ + +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface ReadTransformersMode { + public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT; +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java index 03097887d..42582f178 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/BAQMode.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; + import java.lang.annotation.*; /** @@ -25,5 +27,5 @@ import java.lang.annotation.*; @Target(ElementType.TYPE) public @interface BAQMode { public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS; - public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT; + public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT; } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java index 52ed20ef9..dca23ae66 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; @@ -91,7 +93,8 @@ import java.util.TreeSet; * */ @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) -@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) +@ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER) +@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER) @Requires({DataSource.READS, DataSource.REFERENCE}) public class PrintReads extends ReadWalker implements ThreadSafeMapReduce { @@ -217,11 +220,20 @@ public class PrintReads extends ReadWalker impleme * The reads map function. * * @param ref the reference bases that correspond to our read, if a reference was provided - * @param read the read itself, as a GATKSAMRecord + * @param readIn the read itself, as a GATKSAMRecord * @return the read itself */ - public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker ) { - return simplifyReads ? read.simplify() : read; + public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord readIn, RefMetaDataTracker metaDataTracker ) { + GATKSAMRecord workingRead = readIn; + + for ( final ReadTransformer transformer : getToolkit().getReadTransformers() ) { + if ( logger.isDebugEnabled() ) logger.debug("Applying transformer " + transformer + " to read " + readIn.getReadName()); + workingRead = transformer.apply(workingRead); + } + + if ( simplifyReads ) workingRead = workingRead.simplify(); + + return workingRead; } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 6cd2e8aea..4478f8515 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -30,12 +30,14 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.recalibration.BQSRMode; import java.util.List; @@ -48,7 +50,8 @@ import java.util.List; */ @ReadFilters(MalformedReadFilter.class) @PartitionBy(PartitionType.NONE) -@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) +@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) +@BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class}) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 30d2e24ef..443b493be 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -32,10 +32,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; -import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -46,6 +45,7 @@ import org.broadinstitute.sting.utils.recalibration.QuantizationInfo; import org.broadinstitute.sting.utils.recalibration.RecalUtils; import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -104,7 +104,7 @@ import java.util.ArrayList; */ @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) -@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) +@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) @By(DataSource.READS) @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file @Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 507806fbe..93928a780 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; @@ -117,7 +118,7 @@ import java.util.*; */ @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) -@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) +@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT) @ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} ) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.REFERENCE) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index d9b71f938..76d8d85c2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -111,7 +112,7 @@ import java.util.*; * @author ebanks */ @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) -@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) +@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT) public class IndelRealigner extends ReadWalker { public static final String ORIGINAL_CIGAR_TAG = "OC"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index fc6df6902..a52d57031 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -33,10 +33,10 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.*; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -101,7 +101,7 @@ import java.util.TreeSet; @Reference(window=@Window(start=-1,stop=50)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @By(DataSource.REFERENCE) -@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) +@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) public class RealignerTargetCreator extends RodWalker implements TreeReducible { /** diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 439a0d8ed..cf4d699ee 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -52,13 +52,6 @@ public class BAQ { DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function. } - public enum ApplicationTime { - FORBIDDEN, // Walker does not tolerate BAQ input - ON_INPUT, // apply the BAQ calculation to the incoming reads, the default - ON_OUTPUT, // apply the BAQ calculation to outgoing read streams - HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself - } - public static final String BAQ_TAG = "BQ"; private static double[] qual2prob = new double[256]; @@ -68,7 +61,7 @@ public class BAQ { } // Phred scaled now (changed 1/10/2011) - public static double DEFAULT_GOP = 40; + public static final double DEFAULT_GOP = 40; /* Takes a Phred Scale quality score and returns the error probability. * @@ -110,10 +103,19 @@ public class BAQ { * Use defaults for everything */ public BAQ() { - cd = convertFromPhredScale(DEFAULT_GOP); + this(DEFAULT_GOP); + } + + /** + * Use defaults for everything + */ + public BAQ(final double gapOpenPenalty) { + cd = convertFromPhredScale(gapOpenPenalty); initializeCachedData(); } + + /** * Create a new HmmGlocal object with specified parameters * diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java new file mode 100644 index 000000000..4589ffb71 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQReadTransformer.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.utils.baq; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.walkers.BAQMode; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * Applies Heng's BAQ calculation to a stream of incoming reads + */ +public class BAQReadTransformer extends ReadTransformer { + private BAQ baqHMM; + private IndexedFastaSequenceFile refReader; + private BAQ.CalculationMode cmode; + private BAQ.QualityMode qmode; + + @Override + public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { + final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class); + this.refReader = engine.getReferenceDataSource().getReference(); + this.cmode = engine.getArguments().BAQMode; + this.qmode = mode.QualityMode(); + baqHMM = new BAQ(engine.getArguments().BAQGOP); + + if ( qmode == BAQ.QualityMode.DONT_MODIFY ) + throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY"); + + if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() ) + throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested."); + + return mode.ApplicationTime(); + } + + @Override + public boolean enabled() { + return cmode != BAQ.CalculationMode.OFF; + } + + @Override + public GATKSAMRecord apply(final GATKSAMRecord read) { + baqHMM.baqRead(read, refReader, cmode, qmode); + return read; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java deleted file mode 100644 index adfeef518..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQSamIterator.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.broadinstitute.sting.utils.baq; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.util.Iterator; - -/** - * Simple iterator that applies Heng's BAQ calculation to a stream of incoming reads - */ -public class BAQSamIterator implements StingSAMIterator { - private final StingSAMIterator it; - private final BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters - private final IndexedFastaSequenceFile refReader; - private final BAQ.CalculationMode cmode; - private final BAQ.QualityMode qmode; - - /** - * Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming - * in from it. See BAQ docs for baqType information. - * - * @param refReader - * @param it - * @param cmode - * @param qmode - */ - @Requires({ - "refReader != null", - "it != null", - "cmode != null" , - "qmode != null"}) - public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) { - if ( cmode == BAQ.CalculationMode.OFF ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF"); - if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY"); - - this.refReader = refReader; - this.it = it; - this.cmode = cmode; - this.qmode = qmode; - } - - @Requires("hasNext()") - @Ensures("result != null") - public SAMRecord next() { - //System.out.printf("BAQing during input%n"); - SAMRecord read = it.next(); - baqHMM.baqRead(read, refReader, cmode, qmode); - return read; - } - - public boolean hasNext() { return this.it.hasNext(); } - public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } - public void close() { it.close(); } - public Iterator iterator() { return this; } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java b/public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java new file mode 100644 index 000000000..028e75226 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/baq/ReadTransformingIterator.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.utils.baq; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.Iterator; + +/** + * Iterator that applies a ReadTransformer to a stream of reads + */ +public class ReadTransformingIterator implements StingSAMIterator { + private final StingSAMIterator it; + private final ReadTransformer transformer; + + /** + * Creates a new ReadTransforming iterator + */ + @Requires({"it != null", "engine != null", "transformer != null", "transformer.isInitialized()"}) + public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) { + if ( ! transformer.isInitialized() ) + throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer); + if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN ) + throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer); + + this.it = it; + this.transformer = transformer; + } + + @Requires("hasNext()") + @Ensures("result != null") + public SAMRecord next() { + final GATKSAMRecord read = (GATKSAMRecord)it.next(); + return transformer.apply(read); + } + + public boolean hasNext() { return this.it.hasNext(); } + public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } + public void close() { it.close(); } + public Iterator iterator() { return this; } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java new file mode 100644 index 000000000..431014032 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRMode.java @@ -0,0 +1,30 @@ +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; + +import java.lang.annotation.*; + +/** + * User: hanna + * Date: May 14, 2009 + * Time: 1:51:22 PM + * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT + * Software and documentation are copyright 2005 by the Broad Institute. + * All rights are reserved. + * + * Users acknowledge that this software is supplied without any warranty or support. + * The Broad Institute is not responsible for its use, misuse, or + * functionality. + */ + +/** + * Allows the walker to indicate what type of data it wants to consume. + */ + +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface BQSRMode { + public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT; +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java new file mode 100644 index 000000000..fae0e8c09 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java @@ -0,0 +1,40 @@ +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * A ReadTransformer that applies BQSR on the fly to reads + * + * User: rpoplin + * Date: 2/13/12 + */ +public class BQSRReadTransformer extends ReadTransformer { + private boolean enabled; + private BaseRecalibration bqsr; + + @Override + public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { + this.enabled = engine.hasBaseRecalibration(); + this.bqsr = engine.getBaseRecalibration(); + final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class); + return mode.ApplicationTime(); + } + + @Override + public boolean enabled() { + return enabled; + } + + /** + * initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads. + */ + @Override + public GATKSAMRecord apply(GATKSAMRecord read) { + bqsr.recalibrateRead(read); + return read; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java deleted file mode 100644 index 048f8e58c..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.sting.utils.recalibration; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.Iterator; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 2/13/12 - */ - -public class BQSRSamIterator implements StingSAMIterator { - private final StingSAMIterator it; - private final BaseRecalibration bqsr; - - /** - * Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads. - * - * @param it The incoming SamIterator to wrap - * @param bqsr The object which holds the BQSR table information and knows how to apply it - */ - @Requires({ - "it != null", - "bqsr != null"}) - public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) { - if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object"); - - this.it = it; - this.bqsr = bqsr; - } - - @Requires("hasNext()") - @Ensures("result != null") - public SAMRecord next() { - SAMRecord read = it.next(); - bqsr.recalibrateRead((GATKSAMRecord) read); - return read; - } - - public boolean hasNext() { return this.it.hasNext(); } - public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } - public void close() { it.close(); } - public Iterator iterator() { return this; } -}