From 29a74908bb531d06c484fabb8e97b5de264e64e9 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 14 Jun 2012 00:05:42 -0400 Subject: [PATCH] The next round of BQSR optimizations: no more Long[] array creation --- .../gatk/walkers/bqsr/BQSRKeyManager.java | 52 +++++++----------- .../gatk/walkers/bqsr/ContextCovariate.java | 28 +++++----- .../sting/gatk/walkers/bqsr/Covariate.java | 10 ++-- .../gatk/walkers/bqsr/CovariateValues.java | 14 ++--- .../gatk/walkers/bqsr/CycleCovariate.java | 11 ++-- .../walkers/bqsr/QualityScoreCovariate.java | 10 ++-- .../gatk/walkers/bqsr/ReadCovariates.java | 32 ++++++----- .../gatk/walkers/bqsr/ReadGroupCovariate.java | 8 +-- .../gatk/walkers/bqsr/RecalDataManager.java | 22 ++++++-- .../recalibration/BaseRecalibration.java | 36 +++++++------ .../walkers/bqsr/BQSRKeyManagerUnitTest.java | 53 +++++++++++-------- .../bqsr/RecalibrationReportUnitTest.java | 28 +++++----- .../BaseRecalibrationUnitTest.java | 20 ++++--- 13 files changed, 171 insertions(+), 153 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java index f0c7c080e..9efb5a86f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java @@ -92,49 +92,33 @@ public class BQSRKeyManager { } /** - * Generates one key per optional covariate. + * Generates one key given the optional covariate (or none if it is null) * * Keys include all required covariates, the standard covariate and the event type. * - * Example allKeys: - * RG, QUAL, CYCLE, CONTEXT - * - * List of BitSets returned by this example (given eventType): - * RG, QUAL, CYCLE, EVENT - * RG, QUAL, CONTEXT, EVENT - * - * Note: If there are no optional covariates, only one bitset key will be returned with all the required covariates and the event type - * - * @param allKeys The keys in long representation for each covariate - * @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions) - * @return one key in long representation per covariate + * @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested) + * @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions) + * @return one key in long representation */ - public Long[] longsFromAllKeys(final Long[] allKeys, final EventType eventType) { - final ArrayList allFinalKeys = new ArrayList(); // Generate one key per optional covariate + public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) { - int covariateIndex = 0; + int keyIndex = 0; long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) - masterKey |= (allKeys[covariateIndex++] << infoRequired.offset); + masterKey |= (allKeys[keyIndex++] << infoRequired.offset); final long eventKey = keyFromEvent(eventType); // create a key for the event type masterKey |= (eventKey << nRequiredBits); - for (int i = 0; i < optionalCovariatesInfo.length; i++) { - final Long covariateKey = allKeys[covariateIndex++]; - if (covariateKey == null) - continue; // do not add nulls to the final set of keys. - - long newKey = masterKey | (covariateKey << optionalCovariateOffset); - newKey |= (optionalCovariatesInfo[i].covariateID << optionalCovariateIDOffset); - - allFinalKeys.add(newKey); // add this key to the list of keys + if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) { + final Long covariateKey = allKeys[keyIndex + optionalCovariateIndex]; + if (covariateKey != null) { // do not add nulls to the final set of keys + masterKey |= (covariateKey << optionalCovariateOffset); + masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset); + } } - if (optionalCovariatesInfo.length == 0) // special case when we have no optional covariates - allFinalKeys.add(masterKey); - - return allFinalKeys.toArray(new Long[allFinalKeys.size()]); + return masterKey; } /** @@ -248,14 +232,14 @@ public class BQSRKeyManager { } // cache the key representing an event since it's otherwise created a massive amount of times - private static final Map eventTypeCache = new HashMap(EventType.values().length); // event IDs must be longs so that bit-fiddling works + private static final long[] eventTypeCache = new long[EventType.values().length]; // event IDs must be longs so that bit-fiddling works static { for (final EventType eventType : EventType.values()) - eventTypeCache.put(eventType, (long)eventType.index); + eventTypeCache[eventType.index] = (long)eventType.index; } - private Long keyFromEvent(final EventType eventType) { - return eventTypeCache.get(eventType); + private long keyFromEvent(final EventType eventType) { + return eventTypeCache[eventType.index]; } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 76b84807d..0665ada77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -65,9 +65,9 @@ public class ContextCovariate implements StandardCovariate { @Override public CovariateValues getValues(final GATKSAMRecord read) { final int l = read.getReadLength(); - final Long[] mismatches = new Long[l]; - final Long[] insertions = new Long[l]; - final Long[] deletions = new Long[l]; + final long[] mismatches = new long[l]; + final long[] insertions = new long[l]; + final long[] deletions = new long[l]; final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context @@ -97,15 +97,15 @@ public class ContextCovariate implements StandardCovariate { } @Override - public String formatKey(final Long key) { - if (key == null) // this can only happen in test routines because we do not propagate null keys to the csv file + public String formatKey(final long key) { + if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file return null; return contextFromKey(key); } @Override - public Long longFromKey(Object key) { + public long longFromKey(Object key) { return longFrom((String) key); } @@ -122,8 +122,8 @@ public class ContextCovariate implements StandardCovariate { * @param contextSize context size to use building the context * @return the key representing the context */ - private Long contextWith(final byte[] bases, final int offset, final int contextSize) { - Long result = null; + private long contextWith(final byte[] bases, final int offset, final int contextSize) { + long result = -1; final int start = offset - contextSize + 1; if (start >= 0) { final byte[] context = Arrays.copyOfRange(bases, start, offset + 1); @@ -138,10 +138,10 @@ public class ContextCovariate implements StandardCovariate { * * @param array any array */ - private static void reverse(final Object[] array) { + private static void reverse(final long[] array) { final int arrayLength = array.length; for (int l = 0, r = arrayLength - 1; l < r; l++, r--) { - final Object temp = array[l]; + final long temp = array[l]; array[l] = array[r]; array[r] = temp; } @@ -150,12 +150,12 @@ public class ContextCovariate implements StandardCovariate { static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion. static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length - public static Long longFrom(final String dna) { + public static long longFrom(final String dna) { return keyFromContext(dna.getBytes()); } /** - * Creates a Long representation of a given dna string. + * Creates a long representation of a given dna string. * * Warning: This conversion is limited to long precision, therefore the dna sequence cannot * be longer than 31 bases. @@ -172,7 +172,7 @@ public class ContextCovariate implements StandardCovariate { * @param dna the dna sequence * @return the key representing the dna sequence */ - public static Long keyFromContext(final byte[] dna) { + public static long keyFromContext(final byte[] dna) { if (dna.length > MAX_DNA_CONTEXT) throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length)); @@ -227,7 +227,7 @@ public class ContextCovariate implements StandardCovariate { * @param key the key representing the dna sequence * @return the dna sequence represented by the key */ - public static String contextFromKey(Long key) { + public static String contextFromKey(long key) { if (key < 0) throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java index 814bbe507..952d09520 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java @@ -64,21 +64,21 @@ public interface Covariate { /** * Converts the internal representation of the key to String format for file output. * - * @param key the Long representation of the key + * @param key the long representation of the key * @return a string representation of the key */ - public String formatKey(final Long key); + public String formatKey(final long key); /** - * Converts an Object key into a Long key using only the lowest numberOfBits() bits + * Converts an Object key into a long key using only the lowest numberOfBits() bits * * Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates - * the getValues method already returns all values in Long format. + * the getValues method already returns all values in long format. * * @param key the object corresponding to the covariate * @return a long representation of the object */ - public Long longFromKey(final Object key); + public long longFromKey(final Object key); /** * Each covariate should determine how many bits are necessary to encode it's data diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java index 960c51325..59e9b8131 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java @@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * @since 2/8/12 */ public class CovariateValues { - private final Long[] mismatches; - private final Long[] insertions; - private final Long[] deletions; + private final long[] mismatches; + private final long[] insertions; + private final long[] deletions; - public CovariateValues(Long[] mismatch, Long[] insertion, Long[] deletion) { + public CovariateValues(final long[] mismatch, final long[] insertion, final long[] deletion) { this.mismatches = mismatch; this.insertions = insertion; this.deletions = deletion; } - public Long[] getMismatches() { + public long[] getMismatches() { return mismatches; } - public Long[] getInsertions() { + public long[] getInsertions() { return insertions; } - public Long[] getDeletions() { + public long[] getDeletions() { return deletions; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index a072c4931..3e2fa5869 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -60,7 +59,7 @@ public class CycleCovariate implements StandardCovariate { // Used to pick out the covariate's value from attributes of the read @Override public CovariateValues getValues(final GATKSAMRecord read) { - Long[] cycles = new Long[read.getReadLength()]; + long[] cycles = new long[read.getReadLength()]; final NGSPlatform ngsPlatform = read.getNGSPlatform(); // Discrete cycle platforms @@ -80,7 +79,7 @@ public class CycleCovariate implements StandardCovariate { final int CUSHION = 4; final int MAX_CYCLE = read.getReadLength() - CUSHION - 1; for (int i = 0; i < MAX_CYCLE; i++) { - cycles[i] = (iMAX_CYCLE) ? null : keyFromCycle(cycle); + cycles[i] = (iMAX_CYCLE) ? -1 : keyFromCycle(cycle); cycle += increment; } } @@ -183,7 +182,7 @@ public class CycleCovariate implements StandardCovariate { } @Override - public String formatKey(final Long key) { + public String formatKey(final long key) { long cycle = key >> 1; // shift so we can remove the "sign" bit if ( (key & 1) != 0 ) // is the last bit set? cycle *= -1; // then the cycle is negative @@ -191,7 +190,7 @@ public class CycleCovariate implements StandardCovariate { } @Override - public Long longFromKey(final Object key) { + public long longFromKey(final Object key) { return (key instanceof String) ? keyFromCycle(Short.parseShort((String) key)) : keyFromCycle((Short) key); } @@ -200,7 +199,7 @@ public class CycleCovariate implements StandardCovariate { return BQSRKeyManager.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative } - private static Long keyFromCycle(final short cycle) { + private static long keyFromCycle(final short cycle) { // no negative values because long result = Math.abs(cycle); result = result << 1; // shift so we can add the "sign" bit diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java index b3fcc024c..e98dfbea9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java @@ -47,9 +47,9 @@ public class QualityScoreCovariate implements RequiredCovariate { public CovariateValues getValues(final GATKSAMRecord read) { int readLength = read.getReadLength(); - Long[] mismatches = new Long[readLength]; - Long[] insertions = new Long[readLength]; - Long[] deletions = new Long[readLength]; + long[] mismatches = new long[readLength]; + long[] insertions = new long[readLength]; + long[] deletions = new long[readLength]; byte[] baseQualities = read.getBaseQualities(); byte[] baseInsertionQualities = read.getBaseInsertionQualities(); @@ -71,12 +71,12 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public String formatKey(final Long key) { + public String formatKey(final long key) { return String.format("%d", key); } @Override - public Long longFromKey(final Object key) { + public long longFromKey(final Object key) { return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java index b307aeb17..c5a7a3a5c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java @@ -11,19 +11,23 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; * @since 2/8/12 */ public class ReadCovariates { - private final Long[][] mismatchesKeySet; - private final Long[][] insertionsKeySet; - private final Long[][] deletionsKeySet; + private final long[][] mismatchesKeySet; + private final long[][] insertionsKeySet; + private final long[][] deletionsKeySet; private int nextCovariateIndex; public ReadCovariates(int readLength, int numberOfCovariates) { - this.mismatchesKeySet = new Long[readLength][numberOfCovariates]; - this.insertionsKeySet = new Long[readLength][numberOfCovariates]; - this.deletionsKeySet = new Long[readLength][numberOfCovariates]; + this.mismatchesKeySet = new long[readLength][numberOfCovariates]; + this.insertionsKeySet = new long[readLength][numberOfCovariates]; + this.deletionsKeySet = new long[readLength][numberOfCovariates]; this.nextCovariateIndex = 0; } + public void reset() { + nextCovariateIndex = 0; + } + public void addCovariate(CovariateValues covariate) { transposeCovariateValues(mismatchesKeySet, covariate.getMismatches()); transposeCovariateValues(insertionsKeySet, covariate.getInsertions()); @@ -31,7 +35,7 @@ public class ReadCovariates { nextCovariateIndex++; } - public Long[] getKeySet(final int readPosition, final EventType errorModel) { + public long[] getKeySet(final int readPosition, final EventType errorModel) { switch (errorModel) { case BASE_SUBSTITUTION: return getMismatchesKeySet(readPosition); @@ -44,19 +48,19 @@ public class ReadCovariates { } } - public Long[] getMismatchesKeySet(int readPosition) { + public long[] getMismatchesKeySet(final int readPosition) { return mismatchesKeySet[readPosition]; } - public Long[] getInsertionsKeySet(int readPosition) { + public long[] getInsertionsKeySet(final int readPosition) { return insertionsKeySet[readPosition]; } - public Long[] getDeletionsKeySet(int readPosition) { + public long[] getDeletionsKeySet(final int readPosition) { return deletionsKeySet[readPosition]; } - private void transposeCovariateValues(Long[][] keySet, Long[] covariateValues) { + private void transposeCovariateValues(final long[][] keySet, final long[] covariateValues) { for (int i = 0; i < covariateValues.length; i++) keySet[i][nextCovariateIndex] = covariateValues[i]; } @@ -64,15 +68,15 @@ public class ReadCovariates { /** * Testing routines */ - protected Long[][] getMismatchesKeySet() { + protected long[][] getMismatchesKeySet() { return mismatchesKeySet; } - protected Long[][] getInsertionsKeySet() { + protected long[][] getInsertionsKeySet() { return insertionsKeySet; } - protected Long[][] getDeletionsKeySet() { + protected long[][] getDeletionsKeySet() { return deletionsKeySet; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java index 0bba319c2..a3d225473 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java @@ -54,7 +54,7 @@ public class ReadGroupCovariate implements RequiredCovariate { final int l = read.getReadLength(); final String readGroupId = readGroupValueFromRG(read.getReadGroup()); final long key = keyForReadGroup(readGroupId); - Long[] readGroups = new Long[l]; + long[] readGroups = new long[l]; Arrays.fill(readGroups, key); return new CovariateValues(readGroups, readGroups, readGroups); } @@ -65,12 +65,12 @@ public class ReadGroupCovariate implements RequiredCovariate { } @Override - public String formatKey(final Long key) { + public String formatKey(final long key) { return readGroupReverseLookupTable.get(key); } @Override - public Long longFromKey(Object key) { + public long longFromKey(Object key) { return keyForReadGroup((String) key); } @@ -79,7 +79,7 @@ public class ReadGroupCovariate implements RequiredCovariate { return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE); } - private Long keyForReadGroup(final String readGroupId) { + private long keyForReadGroup(final String readGroupId) { if (!readGroupLookupTable.containsKey(readGroupId)) { readGroupLookupTable.put(readGroupId, nextId); readGroupReverseLookupTable.put(nextId, readGroupId); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index bf4f223e4..98cf587de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -612,15 +612,27 @@ public class RecalDataManager { * @return a matrix with all the covariates calculated for every base in the read */ public static ReadCovariates computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates) { - final int numRequestedCovariates = requestedCovariates.length; - final int readLength = read.getReadLength(); - final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates); + final ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), requestedCovariates.length); + computeCovariates(read, requestedCovariates, readCovariates); + return readCovariates; + } + /** + * Computes all requested covariates for every offset in the given read + * by calling covariate.getValues(..). + * + * It populates an array of covariate values where result[i][j] is the covariate + * value for the ith position in the read and the jth covariate in + * reqeustedCovariates list. + * + * @param read The read for which to compute covariate values. + * @param requestedCovariates The list of requested covariates. + * @param readCovariates The object to store the covariate values + */ + public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read for (final Covariate covariate : requestedCovariates) readCovariates.addCovariate(covariate.getValues(read)); - - return readCovariates; } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index a39a2c42b..da5558a9e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -42,7 +42,9 @@ import java.util.*; public class BaseRecalibration { private final static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code"; - private final static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here"; + private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000; + private final ReadCovariates readCovariates; + private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) private final LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager @@ -64,6 +66,8 @@ public class BaseRecalibration { quantizationInfo.noQuantization(); else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report. quantizationInfo.quantizeQualityScores(quantizationLevels); + + readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); } /** @@ -77,6 +81,7 @@ public class BaseRecalibration { this.quantizationInfo = quantizationInfo; this.keysAndTablesMap = keysAndTablesMap; this.requestedCovariates = requestedCovariates; + readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); } /** @@ -87,7 +92,8 @@ public class BaseRecalibration { * @param read the read to recalibrate */ public void recalibrateRead(final GATKSAMRecord read) { - final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); // compute all covariates for the read + readCovariates.reset(); + RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings final byte[] quals = read.getBaseQualities(errorModel); @@ -95,7 +101,7 @@ public class BaseRecalibration { final byte originalQualityScore = quals[offset]; if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) - final Long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model + final long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base quals[offset] = recalibratedQualityScore; } @@ -123,25 +129,23 @@ public class BaseRecalibration { * @param errorModel the event type * @return A recalibrated quality score as a byte */ - protected byte performSequentialQualityCalculation(final Long[] key, final EventType errorModel) { + protected byte performSequentialQualityCalculation(final long[] key, final EventType errorModel) { final byte qualFromRead = (byte)(long)key[1]; double globalDeltaQ = 0.0; double deltaQReported = 0.0; double deltaQCovariates = 0.0; + long masterKey; for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { final BQSRKeyManager keyManager = mapEntry.getKey(); final Map table = mapEntry.getValue(); - final Long[] bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group switch(keyManager.getNumRequiredCovariates()) { case 1: // this is the ReadGroup table - if (bitKeys.length > 1) - throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); - - final RecalDatum empiricalQualRG = table.get(bitKeys[0]); + masterKey = keyManager.createMasterKey(key, errorModel, -1); + final RecalDatum empiricalQualRG = table.get(masterKey); if (empiricalQualRG != null) { final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(); final double aggregrateQReported = empiricalQualRG.getEstimatedQReported(); @@ -149,19 +153,19 @@ public class BaseRecalibration { } break; case 2: - if (keyManager.getNumOptionalCovariates() == 0) { // this is the QualityScore table - if (bitKeys.length > 1) - throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); - - final RecalDatum empiricalQualQS = table.get(bitKeys[0]); + final int numOptionalCovariates = keyManager.getNumOptionalCovariates(); + if (numOptionalCovariates == 0) { // this is the QualityScore table + masterKey = keyManager.createMasterKey(key, errorModel, -1); + final RecalDatum empiricalQualQS = table.get(masterKey); if (empiricalQualQS != null) { final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(); deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; } } else { // this is the table with all the covariates - for (final Long k : bitKeys) { - final RecalDatum empiricalQualCO = table.get(k); + for (int i = 0; i < numOptionalCovariates; i++) { + masterKey = keyManager.createMasterKey(key, errorModel, i); + final RecalDatum empiricalQualCO = table.get(masterKey); if (empiricalQualCO != null) { final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java index 33ce22c66..ea2b7c499 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java @@ -9,7 +9,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.util.ArrayList; -import java.util.BitSet; import java.util.LinkedList; import java.util.List; @@ -81,7 +80,7 @@ public class BQSRKeyManagerUnitTest { } private void runTestOnRead(GATKSAMRecord read, List covariateList, int nRequired) { - final Long[][][] covariateKeys = new Long[covariateList.size()][EventType.values().length][]; + final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][]; int i = 0; for (Covariate cov : covariateList) { cov.initialize(RAC); @@ -102,13 +101,13 @@ public class BQSRKeyManagerUnitTest { BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); for (int l = 0; l < read.getReadLength(); l++) { - for (int eventType = 0; eventType < EventType.values().length; eventType++) { - Long[] keySet = new Long[covariateList.size()]; + for (EventType eventType : EventType.values()) { + long[] keySet = new long[covariateList.size()]; Object[] expectedRequired = new Object[covariateList.size()]; Object[] expectedCovariate = new Object[covariateList.size()]; for (int j = 0; j < covariateList.size(); j++) { - keySet[j] = covariateKeys[j][eventType][l]; + keySet[j] = covariateKeys[j][eventType.index][l]; if (j < nRequired) expectedRequired[j] = covariateList.get(j).formatKey(keySet[j]); @@ -116,28 +115,38 @@ public class BQSRKeyManagerUnitTest { expectedCovariate[j - nRequired] = covariateList.get(j).formatKey(keySet[j]); } - Long[] hashKeys = keyManager.longsFromAllKeys(keySet, EventType.eventFrom(eventType)); - short cov = 0; - for (Long key : hashKeys) { - Object[] actual = keyManager.keySetFrom(key).toArray(); - - // Build the expected array - Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)]; - System.arraycopy(expectedRequired, 0, expected, 0, nRequired); - if (optionalCovariates.size() > 0) { - expected[expected.length-3] = expectedCovariate[cov]; - expected[expected.length-2] = optionalCovariates.get(cov++).getClass().getSimpleName().split("Covariate")[0]; + if (optionalCovariates.size() == 0) { + final long masterKey = keyManager.createMasterKey(keySet, eventType, -1); + testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, -1); + } else { + for (int j = 0; j < optionalCovariates.size(); j++) { + final long masterKey = keyManager.createMasterKey(keySet, eventType, j); + testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, j); } - expected[expected.length-1] = EventType.eventFrom(eventType); + } + } + } + } + + private void testKeys(final BQSRKeyManager keyManager, final long key, final int nRequired, final List optionalCovariates, + final Object[] expectedRequired, final Object[] expectedCovariate, final EventType eventType, final int index) { + + Object[] actual = keyManager.keySetFrom(key).toArray(); + + // Build the expected array + Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)]; + System.arraycopy(expectedRequired, 0, expected, 0, nRequired); + if (optionalCovariates.size() > 0) { + expected[expected.length-3] = expectedCovariate[index]; + expected[expected.length-2] = optionalCovariates.get(index).getClass().getSimpleName().split("Covariate")[0]; + } + expected[expected.length-1] = eventType; // System.out.println("Actual : " + Utils.join(",", Arrays.asList(actual))); // System.out.println("Expected: " + Utils.join(",", Arrays.asList(expected))); // System.out.println(); - for (int k = 0; k < expected.length; k++) - Assert.assertEquals(actual[k], expected[k]); - } - } - } + for (int k = 0; k < expected.length; k++) + Assert.assertEquals(actual[k], expected[k]); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java index 669c285cd..a2dbf0241 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java @@ -84,22 +84,20 @@ public class RecalibrationReportUnitTest { BQSRKeyManager keyManager = entry.getKey(); Map table = entry.getValue(); - for (Long key : keyManager.longsFromAllKeys(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION)) { - table.put(key, RecalDatum.createRandomRecalDatum(10000, 10)); - nKeys++; + final int numOptionalCovariates = keyManager.getNumOptionalCovariates(); + if (numOptionalCovariates == 0) { + table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, -1), RecalDatum.createRandomRecalDatum(10000, 10)); + table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, -1), RecalDatum.createRandomRecalDatum(100000, 10)); + table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, -1), RecalDatum.createRandomRecalDatum(100000, 10)); + nKeys += 3; + } else { + for (int j = 0; j < numOptionalCovariates; j++) { + table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, j), RecalDatum.createRandomRecalDatum(10000, 10)); + table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, j), RecalDatum.createRandomRecalDatum(100000, 10)); + table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, j), RecalDatum.createRandomRecalDatum(100000, 10)); + nKeys += 3; + } } - - for (Long key : keyManager.longsFromAllKeys(rc.getInsertionsKeySet(offset), EventType.BASE_INSERTION)) { - table.put(key, RecalDatum.createRandomRecalDatum(100000, 10)); - nKeys++; - } - - - for (Long key : keyManager.longsFromAllKeys(rc.getDeletionsKeySet(offset), EventType.BASE_DELETION)) { - table.put(key, RecalDatum.createRandomRecalDatum(100000, 10)); - nKeys++; - } - } } Assert.assertEquals(nKeys, expectedKeys); diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java index ad535df59..f70466d4f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java @@ -83,7 +83,7 @@ public class BaseRecalibrationUnitTest { readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); for (int i=0; i> mapEntry : keysAndTablesMap.entrySet()) { - Long[] keys = mapEntry.getKey().longsFromAllKeys(bitKeys, EventType.BASE_SUBSTITUTION); - for (Long key : keys) - updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum); + final BQSRKeyManager keyManager = mapEntry.getKey(); + final int numOptionalCovariates = keyManager.getNumOptionalCovariates(); + if (numOptionalCovariates == 0) { + final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, -1); + updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum); + } else { + for (int j = 0; j < numOptionalCovariates; j++) { + final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, j); + updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum); + } + } } } dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE); @@ -122,7 +130,7 @@ public class BaseRecalibrationUnitTest { public void testGoldStandardComparison() { debugTables(); for (int i = 0; i < read.getReadLength(); i++) { - Long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION); + long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION); Object [] objKey = buildObjectKey(bitKey); byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION); byte v1 = goldStandardSequentialCalculation(objKey); @@ -130,7 +138,7 @@ public class BaseRecalibrationUnitTest { } } - private Object[] buildObjectKey(Long[] bitKey) { + private Object[] buildObjectKey(long[] bitKey) { Object[] key = new Object[bitKey.length]; key[0] = rgCovariate.formatKey(bitKey[0]); key[1] = qsCovariate.formatKey(bitKey[1]);