diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c0db75aa9..50ef4653b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -185,12 +185,12 @@ public class GenomeAnalysisEngine { public static void resetRandomGenerator(long seed) { randomGenerator.setSeed(seed); } /** - * Static base quality score recalibration helper object + * Base Quality Score Recalibration helper object */ - private static BaseRecalibration baseRecalibration = null; - public static BaseRecalibration getBaseRecalibration() { return baseRecalibration; } - public static boolean hasBaseRecalibration() { return baseRecalibration != null; } - public static void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); } + private BaseRecalibration baseRecalibration = null; + public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } + public boolean hasBaseRecalibration() { return baseRecalibration != null; } + public void setBaseRecalibration(File recalFile) { baseRecalibration = new BaseRecalibration(recalFile); } /** * Actually run the GATK with the specified walker. @@ -770,6 +770,7 @@ public class GenomeAnalysisEngine { getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF, getWalkerBAQQualityMode(), refReader, + getBaseRecalibration(), argCollection.defaultBaseQualities); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index daa8ff60d..db22886ce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import java.util.Collection; /** @@ -27,23 +28,20 @@ import java.util.Collection; * information about how they should be downsampled, sorted, and filtered. */ public class ReadProperties { - private Collection readers = null; - private SAMFileHeader header = null; - private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; - private DownsamplingMethod downsamplingMethod = null; - private ValidationExclusion exclusionList = null; - private Collection supplementalFilters = null; - private boolean includeReadsWithDeletionAtLoci = false; - private boolean useOriginalBaseQualities = false; - private boolean generateExtendedEvents = false; - private BAQ.CalculationMode cmode = BAQ.CalculationMode.OFF; - private BAQ.QualityMode qmode = BAQ.QualityMode.DONT_MODIFY; - IndexedFastaSequenceFile refReader = null; // read for BAQ, if desired - private byte defaultBaseQualities; - - // do we want to generate additional piles of "extended" events (indels) -// immediately after the reference base such event is associated with? - + private final Collection readers; + private final SAMFileHeader header; + private final SAMFileReader.ValidationStringency validationStringency; + private final DownsamplingMethod downsamplingMethod; + private final ValidationExclusion exclusionList; + private final Collection supplementalFilters; + private final boolean includeReadsWithDeletionAtLoci; + private final boolean useOriginalBaseQualities; + private final boolean generateExtendedEvents; + private final BAQ.CalculationMode cmode; + private final BAQ.QualityMode qmode; + private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired + private final BaseRecalibration bqsrApplier; + private final byte defaultBaseQualities; /** * Return true if the walker wants to see reads that contain deletions when looking at locus pileups @@ -126,6 +124,8 @@ public class ReadProperties { return refReader; } + public BaseRecalibration getBQSRApplier() { return bqsrApplier; } + /** * @return Default base quality value to fill reads missing base quality information. */ @@ -165,8 +165,9 @@ public class ReadProperties { boolean includeReadsWithDeletionAtLoci, boolean generateExtendedEvents, BAQ.CalculationMode cmode, - BAQ.QualityMode qmode, + BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, + BaseRecalibration bqsrApplier, byte defaultBaseQualities) { this.readers = samFiles; this.header = header; @@ -180,6 +181,7 @@ public class ReadProperties { this.cmode = cmode; this.qmode = qmode; this.refReader = refReader; + this.bqsrApplier = bqsrApplier; this.defaultBaseQualities = defaultBaseQualities; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 27b9e7f77..70284b2a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -46,6 +46,8 @@ import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQSamIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import java.io.File; @@ -201,6 +203,7 @@ public class SAMDataSource { BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null, // no BAQ + null, // no BQSR (byte) -1); } @@ -237,6 +240,7 @@ public class SAMDataSource { BAQ.CalculationMode cmode, BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, + BaseRecalibration bqsrApplier, byte defaultBaseQualities) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -309,6 +313,7 @@ public class SAMDataSource { cmode, qmode, refReader, + bqsrApplier, defaultBaseQualities); // cache the read group id (original) -> read group id (merged) @@ -591,6 +596,7 @@ public class SAMDataSource { readProperties.getBAQCalculationMode(), readProperties.getBAQQualityMode(), readProperties.getRefReader(), + readProperties.getBQSRApplier(), readProperties.defaultBaseQualities()); } @@ -660,9 +666,10 @@ public class SAMDataSource { BAQ.CalculationMode cmode, BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, + BaseRecalibration bqsrApplier, byte defaultBaseQualities) { - if ( useOriginalBaseQualities || defaultBaseQualities >= 0 ) - // only wrap if we are replacing the original qualitiies or using a default base quality + if (useOriginalBaseQualities || defaultBaseQualities >= 0) + // only wrap if we are replacing the original qualities or using a default base quality wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); // NOTE: this (and other filtering) should be done before on-the-fly sorting @@ -675,6 +682,9 @@ public class SAMDataSource { if (!noValidationOfReadOrder && enableVerification) wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator); + if (bqsrApplier != null) + wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier); + if (cmode != BAQ.CalculationMode.OFF) wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 70fe43755..92c508f85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -68,7 +68,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) { @@ -164,16 +175,22 @@ public class FragmentUtils { } bases[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadBases[iii] : secondReadBases[iii-firstReadStop] ); quals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadQuals[iii] : secondReadQuals[iii-firstReadStop] ); + insertionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadInsertionQuals[iii] : secondReadInsertionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score + deletionQuals[iii] = ( firstReadQuals[iii] > secondReadQuals[iii-firstReadStop] ? firstReadDeletionQuals[iii] : secondReadDeletionQuals[iii-firstReadStop] ); // Purposefully checking the highest base quality score } for(int iii = firstRead.getReadLength(); iii < numBases; iii++) { bases[iii] = secondReadBases[iii-firstReadStop]; quals[iii] = secondReadQuals[iii-firstReadStop]; + insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop]; + deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop]; } final GATKSAMRecord returnRead = new GATKSAMRecord(firstRead.getHeader()); returnRead.setAlignmentStart(firstRead.getUnclippedStart()); returnRead.setReadBases( bases ); - returnRead.setBaseQualities( quals ); + returnRead.setBaseQualities( quals, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ); + returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION ); + returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION ); returnRead.setReadGroup( firstRead.getReadGroup() ); returnRead.setReferenceName( firstRead.getReferenceName() ); final CigarElement c = new CigarElement(bases.length, CigarOperator.M); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java new file mode 100644 index 000000000..048f8e58c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BQSRSamIterator.java @@ -0,0 +1,50 @@ +package org.broadinstitute.sting.utils.recalibration; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.Iterator; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 2/13/12 + */ + +public class BQSRSamIterator implements StingSAMIterator { + private final StingSAMIterator it; + private final BaseRecalibration bqsr; + + /** + * Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads. + * + * @param it The incoming SamIterator to wrap + * @param bqsr The object which holds the BQSR table information and knows how to apply it + */ + @Requires({ + "it != null", + "bqsr != null"}) + public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) { + if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object"); + + this.it = it; + this.bqsr = bqsr; + } + + @Requires("hasNext()") + @Ensures("result != null") + public SAMRecord next() { + SAMRecord read = it.next(); + bqsr.recalibrateRead((GATKSAMRecord) read); + return read; + } + + public boolean hasNext() { return this.it.hasNext(); } + public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } + public void close() { it.close(); } + public Iterator iterator() { return this; } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 2c1bc494a..b08365a78 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; @@ -165,10 +166,8 @@ public class BaseRecalibration { key[iii] = cov.getValue( vals[iii] ); } final String modelString = vals[iii++]; - final RecalDataManager.BaseRecalibrationType errorModel = ( modelString.equals(CovariateKeySet.mismatchesCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION : - ( modelString.equals(CovariateKeySet.insertionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_INSERTION : - ( modelString.equals(CovariateKeySet.deletionsCovariateName) ? RecalDataManager.BaseRecalibrationType.BASE_DELETION : null ) ) ); - + final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.getErrorModelFromString(modelString); + // Create a new datum using the number of observations, number of mismatches, and reported quality score final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 ); // Add that datum to all the collapsed tables which will be used in the sequential calculation @@ -183,19 +182,16 @@ public class BaseRecalibration { final CovariateKeySet covariateKeySet = RecalDataManager.getAllCovariateValuesFor( read ); for( final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values() ) { - final byte[] originalQuals = ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? read.getBaseQualities() : - ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? read.getBaseDeletionQualities() : - ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? read.getBaseDeletionQualities() : null ) ) ); + final byte[] originalQuals = read.getBaseQualities( errorModel ); final byte[] recalQuals = originalQuals.clone(); // For each base in the read for( int offset = 0; offset < read.getReadLength(); offset++ ) { - final Object[] fullCovariateKey = - ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION ? covariateKeySet.getMismatchesKeySet(offset) : - ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_INSERTION ? covariateKeySet.getInsertionsKeySet(offset) : - ( errorModel == RecalDataManager.BaseRecalibrationType.BASE_DELETION ? covariateKeySet.getDeletionsKeySet(offset) : null ) ) ); - + final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel); + + final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates + Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey); if( qualityScore == null ) { qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey ); @@ -206,21 +202,8 @@ public class BaseRecalibration { } preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low - switch (errorModel) { - case BASE_SUBSTITUTION: - read.setBaseQualities( recalQuals ); - break; - case BASE_INSERTION: - read.setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, recalQuals ); - break; - case BASE_DELETION: - read.setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, recalQuals ); - break; - default: - throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel ); - } + read.setBaseQualities( recalQuals, errorModel ); } - } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f6b3d759c..2172cfb94 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager; import org.broadinstitute.sting.utils.NGSPlatform; -import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Arrays; import java.util.HashMap; @@ -163,8 +163,37 @@ public class GATKSAMRecord extends BAMRecord { } /** - * Accessors for base insertion and base deletion quality scores + * Setters and Accessors for base insertion and base deletion quality scores */ + public void setBaseQualities( final byte[] quals, final RecalDataManager.BaseRecalibrationType errorModel ) { + switch( errorModel ) { + case BASE_SUBSTITUTION: + setBaseQualities(quals); + break; + case BASE_INSERTION: + setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals ); + break; + case BASE_DELETION: + setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals ); + break; + default: + throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel ); + } + } + + public byte[] getBaseQualities( final RecalDataManager.BaseRecalibrationType errorModel ) { + switch( errorModel ) { + case BASE_SUBSTITUTION: + return getBaseQualities(); + case BASE_INSERTION: + return getBaseInsertionQualities(); + case BASE_DELETION: + return getBaseDeletionQualities(); + default: + throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel ); + } + } + public byte[] getBaseInsertionQualities() { byte[] quals = getByteArrayAttribute( BQSR_BASE_INSERTION_QUALITIES ); if( quals == null ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 5da8cebf4..20f3e1e35 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -79,7 +79,8 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { false, BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, - null, + null, // no BAQ + null, // no BQSR (byte)0); GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 4011594f3..04e11db54 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -308,6 +308,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { BAQ.CalculationMode.OFF, BAQ.QualityMode.DONT_MODIFY, null, // no BAQ + null, // no BQSR (byte) -1 ); }