From 4895fe2289aecb0f869434a91b352b0536e1f98f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 15 Jun 2012 02:32:00 -0400 Subject: [PATCH] No more extraneous array creation in BQSR covariate classes; now covariates push their data directly to the ReadCovariates class as it's calculated (no more going through CovariateValues.java) --- .../gatk/walkers/bqsr/ContextCovariate.java | 116 +++++++----------- .../sting/gatk/walkers/bqsr/Covariate.java | 6 +- .../gatk/walkers/bqsr/CovariateValues.java | 37 ------ .../gatk/walkers/bqsr/CycleCovariate.java | 42 ++++--- .../walkers/bqsr/QualityScoreCovariate.java | 23 +--- .../gatk/walkers/bqsr/ReadCovariates.java | 21 ++-- .../gatk/walkers/bqsr/ReadGroupCovariate.java | 11 +- .../gatk/walkers/bqsr/RecalDataManager.java | 6 +- .../recalibration/BaseRecalibration.java | 4 +- .../walkers/bqsr/BQSRKeyManagerUnitTest.java | 22 ++-- .../bqsr/ContextCovariateUnitTest.java | 14 ++- .../walkers/bqsr/CycleCovariateUnitTest.java | 21 ++-- .../bqsr/ReadGroupCovariateUnitTest.java | 11 +- 13 files changed, 134 insertions(+), 200 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 8754fc6e1..0efca66c0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -32,8 +32,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.Arrays; - /** * Created by IntelliJ IDEA. * User: rpoplin @@ -54,20 +52,21 @@ public class ContextCovariate implements StandardCovariate { mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE; insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE; deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE; + if (mismatchesContextSize > MAX_DNA_CONTEXT) + throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize)); + if (insertionsContextSize > MAX_DNA_CONTEXT) + throw new UserException.BadArgumentValue("insertions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, insertionsContextSize)); + if (deletionsContextSize > MAX_DNA_CONTEXT) + throw new UserException.BadArgumentValue("deletions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, deletionsContextSize)); LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL; if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0) throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize)); - } @Override - public CovariateValues getValues(final GATKSAMRecord read) { - final int l = read.getReadLength(); - final long[] mismatches = new long[l]; - final long[] insertions = new long[l]; - final long[] deletions = new long[l]; + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context @@ -76,18 +75,10 @@ public class ContextCovariate implements StandardCovariate { if (negativeStrand) bases = BaseUtils.simpleReverseComplement(bases); - for (int i = 0; i < clippedRead.getReadLength(); i++) { - mismatches[i] = contextWith(bases, i, mismatchesContextSize); - insertions[i] = contextWith(bases, i, insertionsContextSize); - deletions[i] = contextWith(bases, i, deletionsContextSize); + final int readLength = clippedRead.getReadLength(); + for (int i = 0; i < readLength; i++) { + values.addCovariate(contextWith(bases, i, mismatchesContextSize), contextWith(bases, i, insertionsContextSize), contextWith(bases, i, deletionsContextSize), (negativeStrand ? readLength - i - 1 : i)); } - - if (negativeStrand) { - reverse(mismatches); - reverse(insertions); - reverse(deletions); - } - return new CovariateValues(mismatches, insertions, deletions); } // Used to get the covariate's value from input csv file during on-the-fly recalibration @@ -106,7 +97,7 @@ public class ContextCovariate implements StandardCovariate { @Override public long longFromKey(Object key) { - return longFrom((String) key); + return keyFromContext((String) key); } @Override @@ -123,35 +114,17 @@ public class ContextCovariate implements StandardCovariate { * @return the key representing the context */ private long contextWith(final byte[] bases, final int offset, final int contextSize) { - long result = -1; final int start = offset - contextSize + 1; - if (start >= 0) { - final byte[] context = Arrays.copyOfRange(bases, start, offset + 1); - if (!BaseUtils.containsBase(context, BaseUtils.N)) - result = keyFromContext(context); - } + final long result; + if (start >= 0) + result = keyFromContext(bases, start, offset + 1); + else + result = -1L; return result; } - /** - * Reverses the given array in place. - * - * @param array any array - */ - private static void reverse(final long[] array) { - final int arrayLength = array.length; - for (int l = 0, r = arrayLength - 1; l < r; l++, r--) { - final long temp = array[l]; - array[l] = array[r]; - array[r] = temp; - } - } - - static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion. - static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length - - public static long longFrom(final String dna) { - return keyFromContext(dna.getBytes()); + public static long keyFromContext(final String dna) { + return keyFromContext(dna.getBytes(), 0, dna.length()); } /** @@ -172,39 +145,38 @@ public class ContextCovariate implements StandardCovariate { * @param dna the dna sequence * @return the key representing the dna sequence */ - public static long keyFromContext(final byte[] dna) { - if (dna.length > MAX_DNA_CONTEXT) - throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length)); - - final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string - long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set - for (final byte base : dna) { - baseTen = baseTen << 2; // multiply by 4 - baseTen += (long)BaseUtils.simpleBaseToBaseIndex(base); + public static long keyFromContext(final byte[] dna, final int start, final int end) { + final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string + long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set + for (int i = start; i < end; i++) { + baseTen = (baseTen << 2); // multiply by 4 + final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]); + if (baseIndex == -1) // ignore non-ACGT bases + return -1L; + baseTen += (long)baseIndex; } return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. } + static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion. + static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length + static { + for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++) + computeCombinationsFor(i); + } + /** * The sum of all combinations of a context of a given length from length = 0 to length. * * Memoized implementation of sum(4^i) , where i=[0,length] * * @param length the length of the DNA context - * @return the sum of all combinations leading up to this context length. */ - private static long combinationsFor(int length) { - if (length > MAX_DNA_CONTEXT) - throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length)); - - // only calculate the number of combinations if the table hasn't already cached the value - if (length > 0 && combinationsPerLength[length] == 0) { - long combinations = 0L; - for (int i = 1; i <= length; i++) - combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) ) - combinationsPerLength[length] = combinations; - } - return combinationsPerLength[length]; + private static void computeCombinationsFor(final int length) { + long combinations = 0L; + for (int i = 1; i <= length; i++) + combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) ) + combinationsPerLength[length] = combinations; } /** @@ -232,7 +204,7 @@ public class ContextCovariate implements StandardCovariate { throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls) - key -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation + key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation StringBuilder dna = new StringBuilder(); while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical) @@ -259,11 +231,11 @@ public class ContextCovariate implements StandardCovariate { * @return the length of the DNA context represented by this number */ private static int contextLengthFor(final long number) { - int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet. - long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it). - while (combinations <= number) { // find the length of the dna string (length) + int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet. + long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it). + while (combinations <= number) { // find the length of the dna string (length) length++; - combinations = combinationsFor(length); // calculate the next context + combinations = combinationsPerLength[length]; // calculate the next context } return length; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java index 952d09520..ff86220b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java @@ -48,10 +48,10 @@ public interface Covariate { /** * Calculates covariate values for all positions in the read. * - * @param read the read to calculate the covariates on. - * @return all the covariate values for every base in the read. + * @param read the read to calculate the covariates on. + * @param values the object to record the covariate values for every base in the read. */ - public CovariateValues getValues(final GATKSAMRecord read); + public void recordValues(final GATKSAMRecord read, final ReadCovariates values); /** * Used to get the covariate's value from input csv file during on-the-fly recalibration diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java deleted file mode 100644 index 59e9b8131..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java +++ /dev/null @@ -1,37 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -/** - * An object to hold the different covariate values for all bases in the read. - * - * Currently we have three different covariates for each read: - * - Mismatch - * - Insertion - * - Deletion - * - * @author Mauricio Carneiro - * @since 2/8/12 - */ -public class CovariateValues { - private final long[] mismatches; - private final long[] insertions; - private final long[] deletions; - - public CovariateValues(final long[] mismatch, final long[] insertion, final long[] deletion) { - this.mismatches = mismatch; - this.insertions = insertion; - this.deletions = deletion; - } - - public long[] getMismatches() { - return mismatches; - } - - public long[] getInsertions() { - return insertions; - } - - public long[] getDeletions() { - return deletions; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 6e3d7327f..3e91ca539 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -58,9 +58,8 @@ public class CycleCovariate implements StandardCovariate { // Used to pick out the covariate's value from attributes of the read @Override - public CovariateValues getValues(final GATKSAMRecord read) { + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { final int readLength = read.getReadLength(); - final long[] cycles = new long[readLength]; final NGSPlatform ngsPlatform = read.getNGSPlatform(); // Discrete cycle platforms @@ -79,12 +78,11 @@ public class CycleCovariate implements StandardCovariate { final int CUSHION = 4; final int MAX_CYCLE = readLength - CUSHION - 1; - for (int i = 0; i < MAX_CYCLE; i++) { - cycles[i] = (iMAX_CYCLE) ? -1L : keyFromCycle(cycle); + for (int i = 0; i < readLength; i++) { + final long key = (iMAX_CYCLE) ? -1L : keyFromCycle(cycle); + values.addCovariate(key, key, key, i); cycle += increment; } - for (int i = MAX_CYCLE; i < readLength; i++) - cycles[i] = -1L; } // Flow cycle platforms @@ -108,19 +106,23 @@ public class CycleCovariate implements StandardCovariate { int iii = 0; while (iii < readLength) { while (iii < readLength && bases[iii] == (byte) 'T') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii++; } while (iii < readLength && bases[iii] == (byte) 'A') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii++; } while (iii < readLength && bases[iii] == (byte) 'C') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii++; } while (iii < readLength && bases[iii] == (byte) 'G') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii++; } if (iii < readLength) { @@ -130,7 +132,8 @@ public class CycleCovariate implements StandardCovariate { cycle++; } if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii++; } @@ -140,19 +143,23 @@ public class CycleCovariate implements StandardCovariate { int iii = readLength - 1; while (iii >= 0) { while (iii >= 0 && bases[iii] == (byte) 'T') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii--; } while (iii >= 0 && bases[iii] == (byte) 'A') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii--; } while (iii >= 0 && bases[iii] == (byte) 'C') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii--; } while (iii >= 0 && bases[iii] == (byte) 'G') { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii--; } if (iii >= 0) { @@ -162,7 +169,8 @@ public class CycleCovariate implements StandardCovariate { cycle++; } if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) { - cycles[iii] = keyFromCycle(cycle); + final long key = keyFromCycle(cycle); + values.addCovariate(key, key, key, iii); iii--; } } @@ -173,8 +181,6 @@ public class CycleCovariate implements StandardCovariate { else { throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid"); } - - return new CovariateValues(cycles, cycles, cycles); } // Used to get the covariate's value from input csv file during on-the-fly recalibration diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java index e98dfbea9..c60ca38e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java @@ -40,28 +40,17 @@ public class QualityScoreCovariate implements RequiredCovariate { // Initialize any member variables using the command-line arguments passed to the walkers @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } + public void initialize(final RecalibrationArgumentCollection RAC) {} @Override - public CovariateValues getValues(final GATKSAMRecord read) { - int readLength = read.getReadLength(); - - long[] mismatches = new long[readLength]; - long[] insertions = new long[readLength]; - long[] deletions = new long[readLength]; - - byte[] baseQualities = read.getBaseQualities(); - byte[] baseInsertionQualities = read.getBaseInsertionQualities(); - byte[] baseDeletionQualities = read.getBaseDeletionQualities(); + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + final byte[] baseQualities = read.getBaseQualities(); + final byte[] baseInsertionQualities = read.getBaseInsertionQualities(); + final byte[] baseDeletionQualities = read.getBaseDeletionQualities(); for (int i = 0; i < baseQualities.length; i++) { - mismatches[i] = (long)baseQualities[i]; - insertions[i] = (long)baseInsertionQualities[i]; - deletions[i] = (long)baseDeletionQualities[i]; + values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i); } - - return new CovariateValues(mismatches, insertions, deletions); } // Used to get the covariate's value from input csv file during on-the-fly recalibration diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java index c5a7a3a5c..c9043dc04 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java @@ -15,24 +15,22 @@ public class ReadCovariates { private final long[][] insertionsKeySet; private final long[][] deletionsKeySet; - private int nextCovariateIndex; + private int currentCovariateIndex = 0; public ReadCovariates(int readLength, int numberOfCovariates) { this.mismatchesKeySet = new long[readLength][numberOfCovariates]; this.insertionsKeySet = new long[readLength][numberOfCovariates]; this.deletionsKeySet = new long[readLength][numberOfCovariates]; - this.nextCovariateIndex = 0; } - public void reset() { - nextCovariateIndex = 0; + public void setCovariateIndex(final int index) { + currentCovariateIndex = index; } - public void addCovariate(CovariateValues covariate) { - transposeCovariateValues(mismatchesKeySet, covariate.getMismatches()); - transposeCovariateValues(insertionsKeySet, covariate.getInsertions()); - transposeCovariateValues(deletionsKeySet, covariate.getDeletions()); - nextCovariateIndex++; + public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) { + mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch; + insertionsKeySet[readOffset][currentCovariateIndex] = insertion; + deletionsKeySet[readOffset][currentCovariateIndex] = deletion; } public long[] getKeySet(final int readPosition, final EventType errorModel) { @@ -60,11 +58,6 @@ public class ReadCovariates { return deletionsKeySet[readPosition]; } - private void transposeCovariateValues(final long[][] keySet, final long[] covariateValues) { - for (int i = 0; i < covariateValues.length; i++) - keySet[i][nextCovariateIndex] = covariateValues[i]; - } - /** * Testing routines */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java index a3d225473..ae0ef38cc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java @@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.Arrays; import java.util.HashMap; /* @@ -50,13 +49,13 @@ public class ReadGroupCovariate implements RequiredCovariate { public void initialize(final RecalibrationArgumentCollection RAC) {} @Override - public CovariateValues getValues(final GATKSAMRecord read) { - final int l = read.getReadLength(); + public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { final String readGroupId = readGroupValueFromRG(read.getReadGroup()); final long key = keyForReadGroup(readGroupId); - long[] readGroups = new long[l]; - Arrays.fill(readGroups, key); - return new CovariateValues(readGroups, readGroups, readGroups); + + final int l = read.getReadLength(); + for (int i = 0; i < l; i++) + values.addCovariate(key, key, key, i); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 63ea2d677..1356ffa94 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -631,8 +631,10 @@ public class RecalDataManager { */ public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read - for (final Covariate covariate : requestedCovariates) - readCovariates.addCovariate(covariate.getValues(read)); + for (int i = 0; i < requestedCovariates.length; i++) { + readCovariates.setCovariateIndex(i); + requestedCovariates[i].recordValues(read, readCovariates); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 2f292dbf8..d7598a826 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.walkers.bqsr.*; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -92,7 +93,6 @@ public class BaseRecalibration { * @param read the read to recalibrate */ public void recalibrateRead(final GATKSAMRecord read) { - readCovariates.reset(); RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings final byte[] quals = read.getBaseQualities(errorModel); @@ -181,7 +181,7 @@ public class BaseRecalibration { } double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula - recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL + recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java index ea2b7c499..da1678d54 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java @@ -80,16 +80,22 @@ public class BQSRKeyManagerUnitTest { } private void runTestOnRead(GATKSAMRecord read, List covariateList, int nRequired) { - final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][]; - int i = 0; - for (Covariate cov : covariateList) { + final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][read.getReadLength()]; + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), covariateList.size()); + for (int i = 0; i < covariateList.size(); i++) { + final Covariate cov = covariateList.get(i); cov.initialize(RAC); - CovariateValues covValues = cov.getValues(read); - covariateKeys[i][EventType.BASE_SUBSTITUTION.index] = covValues.getMismatches(); - covariateKeys[i][EventType.BASE_INSERTION.index] = covValues.getInsertions(); - covariateKeys[i][EventType.BASE_DELETION.index] = covValues.getDeletions(); - i++; + readCovariates.setCovariateIndex(i); + cov.recordValues(read, readCovariates); } + for (int i = 0; i < read.getReadLength(); i++) { + for (EventType eventType : EventType.values()) { + final long[] vals = readCovariates.getKeySet(i, eventType); + for (int j = 0; j < vals.length; j++) + covariateKeys[j][eventType.index][i] = vals[j]; + } + } + List requiredCovariates = new LinkedList(); List optionalCovariates = new LinkedList(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index f9392cd74..e73d40603 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -28,15 +28,17 @@ public class ContextCovariateUnitTest { public void testSimpleContexts() { GATKSAMRecord read = ReadUtils.createRandomRead(1000); GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); - CovariateValues values = covariate.getValues(read); - verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate); - verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate); - verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate); + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + + verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate); + verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate); + verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate); } - public static void verifyCovariateArray(long[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { + public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { for (int i = 0; i < values.length; i++) - Assert.assertEquals(contextCovariate.formatKey(values[i]), expectedContext(read, i, contextSize)); + Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize)); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java index a4a6c9afa..79b57fd8f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java @@ -30,25 +30,26 @@ public class CycleCovariateUnitTest { read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); read.getReadGroup().setPlatform("illumina"); - CovariateValues values = covariate.getValues(read); - verifyCovariateArray(values.getMismatches(), 1, (short) 1); + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), 1, (short) 1); read.setReadNegativeStrandFlag(true); - values = covariate.getValues(read); - verifyCovariateArray(values.getMismatches(), readLength, -1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), readLength, -1); read.setSecondOfPairFlag(true); - values = covariate.getValues(read); - verifyCovariateArray(values.getMismatches(), -readLength, 1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), -readLength, 1); read.setReadNegativeStrandFlag(false); - values = covariate.getValues(read); - verifyCovariateArray(values.getMismatches(), -1, -1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1); } - private void verifyCovariateArray(long[] values, int init, int increment) { + private void verifyCovariateArray(long[][] values, int init, int increment) { for (short i = 0; i < values.length; i++) { - short actual = Short.decode(covariate.formatKey(values[i])); + short actual = Short.decode(covariate.formatKey(values[i][0])); int expected = init + (increment * i); // System.out.println(String.format("%d: %d, %d", i, actual, expected)); Assert.assertEquals(actual, expected); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java index 0f910130b..4970413e8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java @@ -40,14 +40,15 @@ public class ReadGroupCovariateUnitTest { private void runTest(GATKSAMReadGroupRecord rg, String expected) { GATKSAMRecord read = ReadUtils.createRandomRead(10); read.setReadGroup(rg); - CovariateValues values = covariate.getValues(read); - verifyCovariateArray(values.getMismatches(), expected); + ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); + covariate.recordValues(read, readCovariates); + verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected); } - private void verifyCovariateArray(long[] values, String expected) { - for (Long value : values) { - String actual = covariate.formatKey(value); + private void verifyCovariateArray(long[][] values, String expected) { + for (long[] value : values) { + String actual = covariate.formatKey(value[0]); Assert.assertEquals(actual, expected); } }