diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index ac280b70e..b56e5cc16 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -28,41 +28,22 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; -import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.threading.ThreadLocalArray; public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { - - // optimization: only allocate temp arrays once per thread - private final ThreadLocal threadLocalTempQualArray = new ThreadLocalArray(EventType.values().length, byte.class); - private final ThreadLocal threadLocalTempFractionalErrorArray = new ThreadLocalArray(EventType.values().length, double.class); - - public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { - super.initialize(covariates, recalibrationTables); - } - @Override - public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { - final ReadCovariates readCovariates = covariateKeySetFrom(read); - byte[] tempQualArray = threadLocalTempQualArray.get(); - double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get(); + public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { + final GATKSAMRecord read = recalInfo.getRead(); + final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); for( int offset = 0; offset < read.getReadBases().length; offset++ ) { - if( !skip[offset] ) { - tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset]; - tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset]; - tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset]; - tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset]; - tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset]; - tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset]; + if( ! recalInfo.skip(offset) ) { for (final EventType eventType : EventType.values()) { final int[] keys = readCovariates.getKeySet(offset, eventType); final int eventIndex = eventType.index; - final byte qual = tempQualArray[eventIndex]; - final double isError = tempFractionalErrorArray[eventIndex]; + final byte qual = recalInfo.getQual(eventType, offset); + final double isError = recalInfo.getErrorFraction(eventType, offset); incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 6931235b8..8a71872c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -225,19 +225,22 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { // parse the solid color space and check for color no-calls return 0L; // skip this read completely } - read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); - final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases - final int[] isSNP = calculateIsSNP(read, ref, originalRead); - final int[] isInsertion = calculateIsIndel(read, EventType.BASE_INSERTION); - final int[] isDeletion = calculateIsIndel(read, EventType.BASE_DELETION); final byte[] baqArray = calculateBAQArray(read); if( baqArray != null ) { // some reads just can't be BAQ'ed + final ReadCovariates covariates = RecalUtils.computeCovariates(read, requestedCovariates); + final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases + final int[] isSNP = calculateIsSNP(read, ref, originalRead); + final int[] isInsertion = calculateIsIndel(read, EventType.BASE_INSERTION); + final int[] isDeletion = calculateIsIndel(read, EventType.BASE_DELETION); final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray); final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray); final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray); - recalibrationEngine.updateDataForRead(read, skip, snpErrors, insertionErrors, deletionErrors); + + // aggregrate all of the info into our info object, and update the data + final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skip, snpErrors, insertionErrors, deletionErrors); + recalibrationEngine.updateDataForRead(info); return 1L; } else { return 0L; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java new file mode 100644 index 000000000..f8d3c4281 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 12/18/12 + * Time: 3:50 PM + * + * TODO -- merge in ReadCovariates? + */ +public final class ReadRecalibrationInfo { + private final GATKSAMRecord read; + private final int length; + private final ReadCovariates covariates; + private final boolean[] skips; + private final byte[] baseQuals, insertionQuals, deletionQuals; + private final double[] snpErrors, insertionErrors, deletionErrors; + + public ReadRecalibrationInfo(final GATKSAMRecord read, + final ReadCovariates covariates, + final boolean[] skips, + final double[] snpErrors, + final double[] insertionErrors, + final double[] deletionErrors) { + if ( read == null ) throw new IllegalArgumentException("read cannot be null"); + if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null"); + if ( skips == null ) throw new IllegalArgumentException("skips cannot be null"); + if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null"); + // future: may allow insertionErrors && deletionErrors to be null, so don't enforce + + this.read = read; + this.baseQuals = read.getBaseQualities(); + this.length = baseQuals.length; + this.covariates = covariates; + this.skips = skips; + this.insertionQuals = read.getExistingBaseInsertionQualities(); + this.deletionQuals = read.getExistingBaseDeletionQualities(); + this.snpErrors = snpErrors; + this.insertionErrors = insertionErrors; + this.deletionErrors = deletionErrors; + + if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length); + if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length); + if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length); + if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length); + } + + /** + * Get the qual score for event type at offset + * + * @param eventType the type of event we want the qual for + * @param offset the offset into this read for the qual + * @return a valid quality score for event at offset + */ + @Requires("validOffset(offset)") + @Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE") + public byte getQual(final EventType eventType, final int offset) { + switch ( eventType ) { + case BASE_SUBSTITUTION: return baseQuals[offset]; + // note optimization here -- if we don't have ins/del quals we just return the default byte directly + case BASE_INSERTION: return insertionQuals == null ? GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL : insertionQuals[offset]; + case BASE_DELETION: return deletionQuals == null ? GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL : deletionQuals[offset]; + default: throw new IllegalStateException("Unknown event type " + eventType); + } + } + + /** + * Get the error fraction for event type at offset + * + * The error fraction is a value between 0 and 1 that indicates how much certainty we have + * in the error occurring at offset. A value of 1 means that the error definitely occurs at this + * site, a value of 0.0 means it definitely doesn't happen here. 0.5 means that half the weight + * of the error belongs here + * + * @param eventType the type of event we want the qual for + * @param offset the offset into this read for the qual + * @return a fractional weight for an error at this offset + */ + @Requires("validOffset(offset)") + @Ensures("result >= 0.0 && result <= 1.0") + public double getErrorFraction(final EventType eventType, final int offset) { + switch ( eventType ) { + case BASE_SUBSTITUTION: return snpErrors[offset]; + case BASE_INSERTION: return insertionErrors[offset]; + case BASE_DELETION: return deletionErrors[offset]; + default: throw new IllegalStateException("Unknown event type " + eventType); + } + } + + /** + * Get the read involved in this recalibration info + * @return a non-null GATKSAMRecord + */ + @Ensures("result != null") + public GATKSAMRecord getRead() { + return read; + } + + /** + * Should offset in this read be skipped (because it's covered by a known variation site?) + * @param offset a valid offset into this info + * @return true if offset should be skipped, false otherwise + */ + @Requires("validOffset(offset)") + public boolean skip(final int offset) { + return skips[offset]; + } + + /** + * Get the ReadCovariates object carrying the mapping from offsets -> covariate key sets + * @return a non-null ReadCovariates object + */ + @Ensures("result != null") + public ReadCovariates getCovariatesValues() { + return covariates; + } + + /** + * Ensures an offset is valid. Used in contracts + * @param offset a proposed offset + * @return true if offset is valid w.r.t. the data in this object, false otherwise + */ + private boolean validOffset(final int offset) { + return offset >= 0 && offset < baseQuals.length; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index 35375eb1d..5c002b7e5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; +import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -29,10 +30,31 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * OTHER DEALINGS IN THE SOFTWARE. */ public interface RecalibrationEngine { - + /** + * Initialize the recalibration engine + * + * Called once before any calls to updateDataForRead are made. The engine should prepare itself + * to handle any number of updateDataForRead calls containing ReadRecalibrationInfo containing + * keys for each of the covariates provided. + * + * The engine should collect match and mismatch data into the recalibrationTables data. + * + * @param covariates an array of the covariates we'll be using in this engine, order matters + * @param recalibrationTables the destination recalibrationTables where stats should be collected + */ public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); - public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors); + /** + * Update the recalibration statistics using the information in recalInfo + * @param recalInfo data structure holding information about the recalibration values for a single read + */ + @Requires("recalInfo != null") + public void updateDataForRead(final ReadRecalibrationInfo recalInfo); + /** + * Finalize, if appropriate, all derived data in recalibrationTables. + * + * Called once after all calls to updateDataForRead have been issued. + */ public void finalizeData(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index 1e166dfd0..a6ab98e8b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -35,34 +35,37 @@ import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource { - protected Covariate[] covariates; protected RecalibrationTables recalibrationTables; + @Override public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null"); + if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null"); + this.covariates = covariates.clone(); this.recalibrationTables = recalibrationTables; } @Override - public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { + public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { + final GATKSAMRecord read = recalInfo.getRead(); + final EventType eventType = EventType.BASE_SUBSTITUTION; + final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); + for( int offset = 0; offset < read.getReadBases().length; offset++ ) { - if( !skip[offset] ) { - final ReadCovariates readCovariates = covariateKeySetFrom(read); + if( ! recalInfo.skip(offset) ) { + final byte qual = recalInfo.getQual(eventType, offset); + final double isError = recalInfo.getErrorFraction(eventType, offset); + final int[] keys = readCovariates.getKeySet(offset, eventType); - final byte qual = read.getBaseQualities()[offset]; - final double isError = snpErrors[offset]; - - final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); - final int eventIndex = EventType.BASE_SUBSTITUTION.index; - - incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); + incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index); for (int i = 2; i < covariates.length; i++) { if (keys[i] < 0) continue; - incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); + incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index); } } } @@ -79,16 +82,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP return new RecalDatum(1, isError, reportedQual); } - /** - * Get the covariate key set from a read - * - * @param read the read - * @return the covariate keysets for this read - */ - protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE); - } - /** * Create derived recalibration data tables * @@ -129,7 +122,10 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP * @param isError error value for this event * @param keys location in table of our item */ - protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, final byte qual, final double isError, final int... keys ) { + protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, + final byte qual, + final double isError, + final int... keys ) { final RecalDatum existingDatum = table.get(keys); if ( existingDatum == null ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index a83aca8f3..ebb3c1ad0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -56,6 +56,12 @@ public class GATKSAMRecord extends BAMRecord { public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions + /** + * The default quality score for an insertion or deletion, if + * none are provided for this read. + */ + public static final byte DEFAULT_INSERTION_DELETION_QUAL = (byte)45; + // the SAMRecord data we're caching private String mReadString = null; private GATKSAMReadGroupRecord mReadGroup = null; @@ -239,7 +245,7 @@ public class GATKSAMRecord extends BAMRecord { byte [] quals = getExistingBaseInsertionQualities(); if( quals == null ) { quals = new byte[getBaseQualities().length]; - Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + Arrays.fill(quals, DEFAULT_INSERTION_DELETION_QUAL); // Some day in the future when base insertion and base deletion quals exist the samtools API will // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 } return quals; @@ -255,7 +261,7 @@ public class GATKSAMRecord extends BAMRecord { byte[] quals = getExistingBaseDeletionQualities(); if( quals == null ) { quals = new byte[getBaseQualities().length]; - Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + Arrays.fill(quals, DEFAULT_INSERTION_DELETION_QUAL); // Some day in the future when base insertion and base deletion quals exist the samtools API will // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 } return quals;