diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java index d91ddd221..01fa92b8c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGatherer.java @@ -71,11 +71,13 @@ public class BQSRGatherer extends Gatherer { if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) { File recal_out = new File(output.getName() + ".original"); RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport); - RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES); + // TODO -- fix me + //RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES); } else if (!RAC.NO_PLOTS) { File recal_out = new File(output.getName() + ".recal"); - RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES); + // TODO -- fix me + //RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES); } generalReport.output(outputFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java deleted file mode 100644 index 29eecfbb1..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java +++ /dev/null @@ -1,329 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.util.*; - -/** - * This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR - * - * It also handles the event type "covariate" which is not exactly a covariate, but is added as a key to the hashmap. The Key Manager will - * add the event type as a bitset to the end of the covariate bitset key. This way, it won't get int the way of masking the information - * out of the key for the actual covariates, and having the covariates handle it. The key manager handles the event type. - * - * The keys represented by this key manager will always have the same order: - * - * RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate1, OptionalCovariateID, EventType - * RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate2, OptionalCovariateID, EventType - * ... - * RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariateN, OptionalCovariateID, EventType - * - * - * Note that Optional Covariates are optional, and the Key Manager should operate without them if necessary. - * - * @author Mauricio Carneiro - * @since 3/6/12 - */ -public class BQSRKeyManager { - - private final Covariate[] requiredCovariates; - private final Covariate[] optionalCovariates; - private final RequiredCovariateInfo[] requiredCovariatesInfo; - private final OptionalCovariateInfo[] optionalCovariatesInfo; - private final Map covariateNameToIDMap; - - private int nRequiredBits; // Number of bits used to represent the required covariates - - private final int optionalCovariateOffset; - private final int optionalCovariateIDOffset; - - private final long optionalCovariateMask; // Standard mask for optional covariates key - private final long optionalCovariateIDMask; // Standard mask for optional covariates order key - private final long eventIDMask; // Standard mask for event ID - - /** - * Initializes the KeyManager with the total number of covariates to use - * - * @param requiredCovariates the ordered list of required covariates - * @param optionalCovariates the ordered list of optional covariates - */ - public BQSRKeyManager(final List requiredCovariates, final List optionalCovariates) { - this.requiredCovariates = new Covariate[requiredCovariates.size()]; - this.optionalCovariates = new Covariate[optionalCovariates.size()]; - requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list - optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay) - covariateNameToIDMap = new HashMap(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates) - - nRequiredBits = 0; - for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management - final Covariate required = requiredCovariates.get(i); - final int nBits = required.numberOfBits(); // number of bits used by this covariate - final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate - this.requiredCovariates[i] = required; - requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate - nRequiredBits += nBits; - } - - final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length); - eventIDMask = genericMask(nRequiredBits, bitsInEventType); - - short id = 0; - int nOptionalBits = 0; - for (int i = 0; i < optionalCovariates.size(); i++) { - final Covariate optional = optionalCovariates.get(i); - nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate - this.optionalCovariates[i] = optional; - optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional); - final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport - covariateNameToIDMap.put(covariateName, id); - id++; - } - - optionalCovariateOffset = nRequiredBits + bitsInEventType; - optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset - optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits; - final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID - optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset - - final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key - if ( totalNumberOfBits > 64 ) - throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a long"); - } - - /** - * Generates one key given the optional covariate (or none if it is null) - * - * Keys include all required covariates, the standard covariate and the event type. - * - * @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested) - * @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions) - * @return one key in long representation (non-negative) or -1 for a bad key - */ - public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) { - - int keyIndex = 0; - long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on - for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) - masterKey |= (allKeys[keyIndex++] << infoRequired.offset); - - final long eventKey = keyFromEvent(eventType); // create a key for the event type - masterKey |= (eventKey << nRequiredBits); - - if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) { - final long covariateKey = allKeys[keyIndex + optionalCovariateIndex]; - if (covariateKey < 0) // do not add "nulls" to the final set of keys - return -1; - - masterKey |= (covariateKey << optionalCovariateOffset); - masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset); - } - - return masterKey; - } - - /** - * Generates one key for the covariates represented in Object[] key - * - * The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file) - * and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many. - * - * Example key: - * RG, QUAL, CYCLE, CYCLE_ID, EventType - * - * @param key list of objects produced by the required covariates followed by one or zero optional covariates. - * @return a key representing these objects. - */ - public long longFromKey(Object[] key) { - int requiredCovariate = 0; - long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on - for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) - masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset); - - final int eventIndex = key.length - 1; // the event type is always the last key - final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type - masterKey |= (eventKey << nRequiredBits); - - if (optionalCovariatesInfo.length > 0) { - final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array - final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's - final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index - final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information - - final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface - masterKey |= (covariateKey << optionalCovariateOffset); - masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset); - } - - return masterKey; - } - - /** - * Covariate id can be either the covariate name (String) or the actual id (short). This method - * finds it's type and converts accordingly to the short notation. - * - * @param id the string or short representation of the optional covariate id - * @return the short representation of the optional covariate id. - */ - private short parseCovariateID(final Object id) { - return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id; - } - - /** - * Generates a key set of objects from a combined master key. - * - * Masks out each covariate independently and decodes their values (Object) into a keyset - * - * @param master the master representation of the keys - * @return an object array with the values for each key - */ - public List keySetFrom(final long master) { - final List objectKeys = new ArrayList(); - for (RequiredCovariateInfo info : requiredCovariatesInfo) { - final long covariateKey = extractKeyFromMaster(master, info.mask, info.offset); // get the covariate's key - objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface - } - - if (optionalCovariatesInfo.length > 0) { - final long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key - final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is) - Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object - objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set - objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id - } - - objectKeys.add(EventType.eventFrom((int)extractKeyFromMaster(master, eventIDMask, nRequiredBits))); // add the event type object to the key set - - return objectKeys; - } - - public Covariate[] getRequiredCovariates() { - return requiredCovariates; - } - - public Covariate[] getOptionalCovariates() { - return optionalCovariates; - } - - public int getNumRequiredCovariates() { - return requiredCovariates.length; - } - - public int getNumOptionalCovariates() { - return optionalCovariates.length; - } - - /** - * Creates a mask for the requested covariate to extract the relevant key from a combined master key - * - * @param offset the offset into the master key - * @param nBits the number of bits needed by the Covariate to represent its values - * @return the mask relevant to the covariate - */ - private long genericMask(final int offset, final int nBits) { - long mask = 0L; - for ( int i = 0; i < nBits; i++ ) - mask |= 1L << (offset+i); - return mask; - } - - private long extractKeyFromMaster(final long master, final long mask, final int offset) { - long key = master & mask; - return key >> offset; - } - - // cache the key representing an event since it's otherwise created a massive amount of times - private static final long[] eventTypeCache = new long[EventType.values().length]; // event IDs must be longs so that bit-fiddling works - static { - for (final EventType eventType : EventType.values()) - eventTypeCache[eventType.index] = (long)eventType.index; - } - - private long keyFromEvent(final EventType eventType) { - return eventTypeCache[eventType.index]; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof BQSRKeyManager)) - return false; - - BQSRKeyManager other = (BQSRKeyManager) o; - if (this == other) - return true; - - if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length || - optionalCovariatesInfo.length != other.optionalCovariatesInfo.length) - return false; - - for (int i = 0; i < requiredCovariates.length; i++) { - Covariate myRequiredCovariate = requiredCovariates[i]; - Covariate otherRequiredCovariate = other.requiredCovariates[i]; - String thisName = myRequiredCovariate.getClass().getSimpleName(); - String otherName = otherRequiredCovariate.getClass().getSimpleName(); - if (!thisName.equals(otherName)) - return false; - } - - for (int i = 0; i < optionalCovariates.length; i++) { - Covariate myOptionalCovariate = optionalCovariates[i]; - Covariate otherOptionalCovariate = other.optionalCovariates[i]; - String thisName = myOptionalCovariate.getClass().getSimpleName(); - String otherName = otherOptionalCovariate.getClass().getSimpleName(); - if (!thisName.equals(otherName)) - return false; - } - - return true; - } - - /** - * Calculates the number of bits necessary to represent a given number of elements - * - * @param numberOfElements the number of elements to represent (must be positive) - * @return the number of bits necessary to represent this many elements - */ - public static int numberOfBitsToRepresent(long numberOfElements) { - if (numberOfElements < 0) - throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements); - - if (numberOfElements == 1L) - return 1; // special case - - int n = 0; - numberOfElements--; - while (numberOfElements > 0) { - numberOfElements = numberOfElements >> 1; - n++; - } - return n; - } - - /** - * Aggregate information for each Covariate - */ - private static class RequiredCovariateInfo { - public final int nBits; // number of bits for this key - public final int offset; // the offset into the master key - public final long mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits ) - public final Covariate covariate; // this allows reverse lookup of the Covariates in order - - RequiredCovariateInfo(final int nBits, final int offset, final long mask, final Covariate covariate) { - this.nBits = nBits; - this.offset = offset; - this.mask = mask; - this.covariate = covariate; - } - } - - private static class OptionalCovariateInfo { - public final long covariateID; // cache the covariate ID (must be a long so that bit-fiddling works) - public final Covariate covariate; - - OptionalCovariateInfo(final long covariateID, final Covariate covariate) { - this.covariateID = covariateID; - this.covariate = covariate; - } - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index fae2ac898..365c816c7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -43,6 +43,9 @@ public class ContextCovariate implements StandardCovariate { private int mismatchesContextSize; private int indelsContextSize; + // the maximum context size (number of bases) permitted; we need to keep the leftmost base free so that values are + // not negative and we reserve 4 more bits to represent the length of the context; it takes 2 bits to encode one base. + static final private int MAX_DNA_CONTEXT = 13; private byte LOW_QUAL_TAIL; // Initialize any member variables using the command-line arguments passed to the walkers @@ -64,6 +67,7 @@ public class ContextCovariate implements StandardCovariate { @Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { + // TODO -- wrong: fix me final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); @@ -73,7 +77,7 @@ public class ContextCovariate implements StandardCovariate { final int readLength = clippedRead.getReadLength(); for (int i = 0; i < readLength; i++) { - final long indelKey = contextWith(bases, i, indelsContextSize); + final int indelKey = contextWith(bases, i, indelsContextSize); values.addCovariate(contextWith(bases, i, mismatchesContextSize), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i)); } } @@ -85,7 +89,7 @@ public class ContextCovariate implements StandardCovariate { } @Override - public String formatKey(final long key) { + public String formatKey(final int key) { if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file return null; @@ -93,13 +97,8 @@ public class ContextCovariate implements StandardCovariate { } @Override - public long longFromKey(Object key) { - return keyFromContext((String) key); - } - - @Override - public int numberOfBits() { - return Integer.bitCount(Integer.MAX_VALUE); + public int keyFromValue(final Object value) { + return keyFromContext((String) value); } /** @@ -110,130 +109,61 @@ public class ContextCovariate implements StandardCovariate { * @param contextSize context size to use building the context * @return the key representing the context */ - private long contextWith(final byte[] bases, final int offset, final int contextSize) { + private int contextWith(final byte[] bases, final int offset, final int contextSize) { final int start = offset - contextSize + 1; - final long result; - if (start >= 0) - result = keyFromContext(bases, start, offset + 1); - else - result = -1L; - return result; + return (start >= 0) ? keyFromContext(bases, start, offset + 1) : -1; } - public static long keyFromContext(final String dna) { + public static int keyFromContext(final String dna) { return keyFromContext(dna.getBytes(), 0, dna.length()); } /** - * Creates a long representation of a given dna string. + * Creates a int representation of a given dna string. * - * Warning: This conversion is limited to long precision, therefore the dna sequence cannot - * be longer than 31 bases. - * - * The bit representation of a dna string is the simple: - * 0 A 4 AA 8 CA - * 1 C 5 AC ... - * 2 G 6 AG 1343 TTGGT - * 3 T 7 AT 1364 TTTTT - * - * To convert from dna to number, we convert the dna string to base10 and add all combinations that - * preceded the string (with smaller lengths). - * - * @param dna the dna sequence + * @param dna the dna sequence + * @param start the start position in the byte array (inclusive) + * @param end the end position in the array (exclusive) * @return the key representing the dna sequence */ - public static long keyFromContext(final byte[] dna, final int start, final int end) { - final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string - long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set + public static int keyFromContext(final byte[] dna, final int start, final int end) { + + // TODO -- bit fiddle to ge this all working in a single call to the method (mask out length, shift, OR length back in) + + int key = end - start; + int bitOffset = 4; for (int i = start; i < end; i++) { - baseTen = (baseTen << 2); // multiply by 4 final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]); if (baseIndex == -1) // ignore non-ACGT bases - return -1L; - baseTen += (long)baseIndex; + return -1; + key |= (baseIndex << bitOffset); + bitOffset += 2; } - return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. - } - - static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion. - static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length - static { - for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++) - computeCombinationsFor(i); - } - - /** - * The sum of all combinations of a context of a given length from length = 0 to length. - * - * Memoized implementation of sum(4^i) , where i=[0,length] - * - * @param length the length of the DNA context - */ - private static void computeCombinationsFor(final int length) { - long combinations = 0L; - for (int i = 1; i <= length; i++) - combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) ) - combinationsPerLength[length] = combinations; + return key; } /** * Converts a key into the dna string representation. * - * Warning: This conversion is limited to long precision, therefore the dna sequence cannot - * be longer than 31 bases. - * - * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the - * base_10 representation of the sequence. This is important for us to know how to bring the number - * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented - * as 0's and leading 0's are omitted). - * - * quasi-canonical because A is represented by a 0, therefore, - * instead of : 0, 1, 2, 3, 10, 11, 12, ... - * we have : 0, 1, 2, 3, 00, 01, 02, ... - * - * but we can correctly decode it because we know the final length. - * * @param key the key representing the dna sequence * @return the dna sequence represented by the key */ - public static String contextFromKey(long key) { + public static String contextFromKey(final int key) { if (key < 0) throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); - final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls) - key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation + final int length = key & 15; // the first 4 bits represent the length (in bp) of the context + int mask = 48; // use the mask to pull out bases + int offset = 4; StringBuilder dna = new StringBuilder(); - while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical) - final byte base = (byte) (key & 3); // equivalent to (key % 4) - dna.append((char)BaseUtils.baseIndexToSimpleBase(base)); - key = key >> 2; // divide by 4 + for (int i = 0; i < length; i++) { + final int baseIndex = (key & mask) >> offset; + dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex)); + mask = mask << 2; // move the mask over to the next 2 bits + offset += 2; } - for (int j = dna.length(); j < length; j++) - dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above) - return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along - } - - /** - * Calculates the length of the DNA context for a given base 10 number - * - * It is important to know the length given the base 10 number to calculate the number of combinations - * and to disambiguate the "quasi-canonical" state. - * - * This method also calculates the number of combinations as a by-product, but since it memoizes the - * results, a subsequent call to combinationsFor(length) is O(1). - * - * @param number the base 10 representation of the key - * @return the length of the DNA context represented by this number - */ - private static int contextLengthFor(final long number) { - int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet. - long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it). - while (combinations <= number) { // find the length of the dna string (length) - length++; - combinations = combinationsPerLength[length]; // calculate the next context - } - return length; + return dna.toString(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java index ff86220b8..4b959eea4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java @@ -67,7 +67,7 @@ public interface Covariate { * @param key the long representation of the key * @return a string representation of the key */ - public String formatKey(final long key); + public String formatKey(final int key); /** * Converts an Object key into a long key using only the lowest numberOfBits() bits @@ -75,18 +75,10 @@ public interface Covariate { * Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates * the getValues method already returns all values in long format. * - * @param key the object corresponding to the covariate + * @param value the object corresponding to the covariate * @return a long representation of the object */ - public long longFromKey(final Object key); - - /** - * Each covariate should determine how many bits are necessary to encode it's data - * - * @return The number of bits used to represent the values of this covariate. - */ - public int numberOfBits(); - + public int keyFromValue(final Object value); } interface RequiredCovariate extends Covariate {} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 3e91ca539..3c917388c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -79,7 +79,7 @@ public class CycleCovariate implements StandardCovariate { final int CUSHION = 4; final int MAX_CYCLE = readLength - CUSHION - 1; for (int i = 0; i < readLength; i++) { - final long key = (iMAX_CYCLE) ? -1L : keyFromCycle(cycle); + final int key = (iMAX_CYCLE) ? -1 : keyFromCycle(cycle); values.addCovariate(key, key, key, i); cycle += increment; } @@ -106,22 +106,22 @@ public class CycleCovariate implements StandardCovariate { int iii = 0; while (iii < readLength) { while (iii < readLength && bases[iii] == (byte) 'T') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii++; } while (iii < readLength && bases[iii] == (byte) 'A') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii++; } while (iii < readLength && bases[iii] == (byte) 'C') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii++; } while (iii < readLength && bases[iii] == (byte) 'G') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii++; } @@ -132,7 +132,7 @@ public class CycleCovariate implements StandardCovariate { cycle++; } if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii++; } @@ -143,22 +143,22 @@ public class CycleCovariate implements StandardCovariate { int iii = readLength - 1; while (iii >= 0) { while (iii >= 0 && bases[iii] == (byte) 'T') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii--; } while (iii >= 0 && bases[iii] == (byte) 'A') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii--; } while (iii >= 0 && bases[iii] == (byte) 'C') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii--; } while (iii >= 0 && bases[iii] == (byte) 'G') { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii--; } @@ -169,7 +169,7 @@ public class CycleCovariate implements StandardCovariate { cycle++; } if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) { - final long key = keyFromCycle(cycle); + final int key = keyFromCycle(cycle); values.addCovariate(key, key, key, iii); iii--; } @@ -190,26 +190,21 @@ public class CycleCovariate implements StandardCovariate { } @Override - public String formatKey(final long key) { - long cycle = key >> 1; // shift so we can remove the "sign" bit + public String formatKey(final int key) { + int cycle = key >> 1; // shift so we can remove the "sign" bit if ( (key & 1) != 0 ) // is the last bit set? cycle *= -1; // then the cycle is negative return String.format("%d", cycle); } @Override - public long longFromKey(final Object key) { - return (key instanceof String) ? keyFromCycle(Integer.parseInt((String) key)) : keyFromCycle((Integer) key); + public int keyFromValue(final Object value) { + return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value); } - @Override - public int numberOfBits() { - return Integer.bitCount(Integer.MAX_VALUE); - } - - private static long keyFromCycle(final int cycle) { + private static int keyFromCycle(final int cycle) { // no negative values because values must fit into the first few bits of the long - long result = Math.abs(cycle); + int result = Math.abs(cycle); result = result << 1; // shift so we can add the "sign" bit if ( cycle < 0 ) result++; // negative cycles get the lower-most bit set diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java index c60ca38e1..8ee980124 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /* @@ -49,7 +48,7 @@ public class QualityScoreCovariate implements RequiredCovariate { final byte[] baseDeletionQualities = read.getBaseDeletionQualities(); for (int i = 0; i < baseQualities.length; i++) { - values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i); + values.addCovariate((int)baseQualities[i], (int)baseInsertionQualities[i], (int)baseDeletionQualities[i], i); } } @@ -60,17 +59,12 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public String formatKey(final long key) { + public String formatKey(final int key) { return String.format("%d", key); } @Override - public long longFromKey(final Object key) { - return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key; + public int keyFromValue(final Object value) { + return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value; } - - @Override - public int numberOfBits() { - return BQSRKeyManager.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE); - } -} +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java index 02339330b..541f3a0a5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java @@ -1,13 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.recalibration.QualQuantizer; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.util.Arrays; import java.util.List; -import java.util.Map; /** * Class that encapsulates the information necessary for quality score quantization for BQSR @@ -30,25 +31,17 @@ public class QuantizationInfo { this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals)); } - public QuantizationInfo(Map> keysAndTablesMap, int quantizationLevels) { + public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) { final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution for (int i = 0; i < qualHistogram.length; i++) qualHistogram[i] = 0L; - Map qualTable = null; // look for the quality score table - for (Map.Entry> entry : keysAndTablesMap.entrySet()) { - BQSRKeyManager keyManager = entry.getKey(); - if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates - qualTable = entry.getValue(); - } + final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table - if (qualTable == null) - throw new ReviewedStingException("Could not find QualityScore table."); - - for (RecalDatum datum : qualTable.values()) { - int empiricalQual = (int) Math.round(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) - long nObservations = datum.numObservations; - qualHistogram[empiricalQual] += nObservations; // add the number of observations for every key + for (final Object value : qualTable.getAllValues()) { + final RecalDatum datum = (RecalDatum)value; + final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL ) + qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key } empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities quantizeQualityScores(quantizationLevels); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java index c9043dc04..5e907237d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - /** * The object temporarily held by a read that describes all of it's covariates. * @@ -11,65 +9,56 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; * @since 2/8/12 */ public class ReadCovariates { - private final long[][] mismatchesKeySet; - private final long[][] insertionsKeySet; - private final long[][] deletionsKeySet; + private final int[][][] keys; private int currentCovariateIndex = 0; - public ReadCovariates(int readLength, int numberOfCovariates) { - this.mismatchesKeySet = new long[readLength][numberOfCovariates]; - this.insertionsKeySet = new long[readLength][numberOfCovariates]; - this.deletionsKeySet = new long[readLength][numberOfCovariates]; + public ReadCovariates(final int readLength, final int numberOfCovariates) { + keys = new int[EventType.values().length][readLength][numberOfCovariates]; } public void setCovariateIndex(final int index) { currentCovariateIndex = index; } - public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) { - mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch; - insertionsKeySet[readOffset][currentCovariateIndex] = insertion; - deletionsKeySet[readOffset][currentCovariateIndex] = deletion; + public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) { + keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch; + keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion; + keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion; } - public long[] getKeySet(final int readPosition, final EventType errorModel) { - switch (errorModel) { - case BASE_SUBSTITUTION: - return getMismatchesKeySet(readPosition); - case BASE_INSERTION: - return getInsertionsKeySet(readPosition); - case BASE_DELETION: - return getDeletionsKeySet(readPosition); - default: - throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel); - } + public int[] getKeySet(final int readPosition, final EventType errorModel) { + return keys[errorModel.index][readPosition]; } - public long[] getMismatchesKeySet(final int readPosition) { - return mismatchesKeySet[readPosition]; + public int[][] getKeySet(final EventType errorModel) { + return keys[errorModel.index]; } - public long[] getInsertionsKeySet(final int readPosition) { - return insertionsKeySet[readPosition]; + public int[] getMismatchesKeySet(final int readPosition) { + return keys[EventType.BASE_SUBSTITUTION.index][readPosition]; } - public long[] getDeletionsKeySet(final int readPosition) { - return deletionsKeySet[readPosition]; + public int[] getInsertionsKeySet(final int readPosition) { + return keys[EventType.BASE_INSERTION.index][readPosition]; + } + + public int[] getDeletionsKeySet(final int readPosition) { + return keys[EventType.BASE_DELETION.index][readPosition]; } /** * Testing routines */ - protected long[][] getMismatchesKeySet() { - return mismatchesKeySet; + protected int[][] getMismatchesKeySet() { + return keys[EventType.BASE_SUBSTITUTION.index]; } - protected long[][] getInsertionsKeySet() { - return insertionsKeySet; + protected int[][] getInsertionsKeySet() { + return keys[EventType.BASE_INSERTION.index]; } - protected long[][] getDeletionsKeySet() { - return deletionsKeySet; + protected int[][] getDeletionsKeySet() { + return keys[EventType.BASE_DELETION.index]; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java index ae0ef38cc..c086ef6d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java @@ -40,9 +40,9 @@ import java.util.HashMap; public class ReadGroupCovariate implements RequiredCovariate { - private final HashMap readGroupLookupTable = new HashMap(); - private final HashMap readGroupReverseLookupTable = new HashMap(); - private long nextId = 0L; + private final HashMap readGroupLookupTable = new HashMap(); + private final HashMap readGroupReverseLookupTable = new HashMap(); + private int nextId = 0; // Initialize any member variables using the command-line arguments passed to the walkers @Override @@ -51,7 +51,7 @@ public class ReadGroupCovariate implements RequiredCovariate { @Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { final String readGroupId = readGroupValueFromRG(read.getReadGroup()); - final long key = keyForReadGroup(readGroupId); + final int key = keyForReadGroup(readGroupId); final int l = read.getReadLength(); for (int i = 0; i < l; i++) @@ -64,21 +64,16 @@ public class ReadGroupCovariate implements RequiredCovariate { } @Override - public String formatKey(final long key) { + public String formatKey(final int key) { return readGroupReverseLookupTable.get(key); } @Override - public long longFromKey(Object key) { - return keyForReadGroup((String) key); + public int keyFromValue(final Object value) { + return keyForReadGroup((String) value); } - @Override - public int numberOfBits() { - return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE); - } - - private long keyForReadGroup(final String readGroupId) { + private int keyForReadGroup(final String readGroupId) { if (!readGroupLookupTable.containsKey(readGroupId)) { readGroupLookupTable.put(readGroupId, nextId); readGroupReverseLookupTable.put(nextId, readGroupId); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 1356ffa94..ec82da95f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -32,11 +32,13 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.io.Resource; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -82,6 +84,14 @@ public class RecalDataManager { private static final String SCRIPT_FILE = "BQSR.R"; + private static final Pair covariateValue = new Pair(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s"); + private static final Pair covariateName = new Pair(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s"); + private static final Pair eventType = new Pair(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s"); + private static final Pair empiricalQuality = new Pair(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); + private static final Pair estimatedQReported = new Pair(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); + private static final Pair nObservations = new Pair(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); + private static final Pair nErrors = new Pair(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d"); + public enum SOLID_RECAL_MODE { /** @@ -141,30 +151,6 @@ public class RecalDataManager { } } - - /** - * Initializes the recalibration table -> key manager map - * - * @param requiredCovariates list of required covariates (in order) - * @param optionalCovariates list of optional covariates (in order) - * @return a map with each key manager and it's corresponding recalibration table properly initialized - */ - public static LinkedHashMap> initializeTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { - final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); - final ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. - final ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables - for (Covariate covariate : requiredCovariates) { - requiredCovariatesToAdd.add(covariate); - final Map recalTable = new HashMap(); // initializing a new recal table for each required covariate (cumulatively) - final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager - tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map - } - final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates - final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager - tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map - return tablesAndKeysMap; - } - /** * Generates two lists : required covariates and optional covariates based on the user's requests. * @@ -223,42 +209,29 @@ public class RecalDataManager { logger.info(""); } - private static List generateReportTables(Map> keysAndTablesMap) { + private static List generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { List result = new LinkedList(); int tableIndex = 0; - final Pair covariateValue = new Pair(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s"); - final Pair covariateName = new Pair(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s"); - final Pair eventType = new Pair(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s"); - final Pair empiricalQuality = new Pair(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f"); - final Pair estimatedQReported = new Pair(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f"); - final Pair nObservations = new Pair(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); - final Pair nErrors = new Pair(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d"); + final Map covariateNameMap = new HashMap(requestedCovariates.length); + for (final Covariate covariate : requestedCovariates) + covariateNameMap.put(covariate, parseCovariateName(covariate)); - for (Map.Entry> entry : keysAndTablesMap.entrySet()) { - final BQSRKeyManager keyManager = entry.getKey(); - final Map recalTable = entry.getValue(); + for (final RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) { - final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs. - - final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table - final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table - - final ArrayList> columnNames = new ArrayList>(); // initialize the array to hold the column names - - for (final Covariate covariate : requiredList) { - final String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order - columnNames.add(new Pair(name, "%s")); // save the required covariate name so we can reference it in the future - } - - if (optionalList.length > 0) { - columnNames.add(covariateValue); - columnNames.add(covariateName); + final ArrayList> columnNames = new ArrayList>(); // initialize the array to hold the column names + columnNames.add(new Pair(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future + if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) { + columnNames.add(new Pair(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future + if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) { + columnNames.add(covariateValue); + columnNames.add(covariateName); + } } columnNames.add(eventType); // the order of these column names is important here columnNames.add(empiricalQuality); - if (isReadGroupTable) + if (type == RecalibrationTables.TableType.READ_GROUP_TABLE) columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported columnNames.add(nObservations); columnNames.add(nErrors); @@ -269,42 +242,59 @@ public class RecalDataManager { int rowIndex = 0; - for (Map.Entry recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys - final Long bitSetKey = recalTableEntry.getKey(); - final Map columnData = new HashMap(columnNames.size()); - final Iterator> iterator = columnNames.iterator(); - for (final Object key : keyManager.keySetFrom(bitSetKey)) { - final String columnName = iterator.next().getFirst(); - columnData.put(columnName, key); - } - final RecalDatum datum = recalTableEntry.getValue(); - columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality()); - if (isReadGroupTable) - columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table - columnData.put(iterator.next().getFirst(), datum.numObservations); - columnData.put(iterator.next().getFirst(), datum.numMismatches); + final NestedHashMap table = recalibrationTables.getTable(type); + for (final NestedHashMap.Leaf row : table.getAllLeaves()) { + final RecalDatum datum = (RecalDatum)row.value; + final List keys = row.keys; - for (final Map.Entry dataEntry : columnData.entrySet()) { - final String columnName = dataEntry.getKey(); - final Object value = dataEntry.getValue(); - reportTable.set(rowIndex, columnName, value.toString()); + int columnIndex = 0; + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey((Integer)keys.get(columnIndex++))); + if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) { + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey((Integer) keys.get(columnIndex++))); + if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) { + final int covariateIndex = (Integer)keys.get(columnIndex); + final Covariate covariate = requestedCovariates[2 + covariateIndex]; + final int covariateKey = (Integer)keys.get(columnIndex+1); + + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(covariateKey)); + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariateNameMap.get(covariate)); + } } + + final EventType event = EventType.eventFrom((Integer)keys.get(columnIndex)); + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), event); + + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality()); + if (type == RecalibrationTables.TableType.READ_GROUP_TABLE) + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations); + setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches); + rowIndex++; } result.add(reportTable); } + return result; } - public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map> keysAndTablesMap, PrintStream outputFile) { - outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile); + private static String parseCovariateName(final Covariate covariate) { + return covariate.getClass().getSimpleName().split("Covariate")[0]; } - public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, PrintStream outputFile) { - outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile); + private static void setReportTableCell(final GATKReportTable reportTable, final int rowIndex, final String columnName, final Object value) { + reportTable.set(rowIndex, columnName, value.toString()); } - private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List recalTables, PrintStream outputFile) { + public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { + outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); + } + + public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { + outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); + } + + private static void outputRecalibrationReport(final GATKReportTable argumentTable, final GATKReportTable quantizationTable, final List recalTables, final PrintStream outputFile) { final GATKReport report = new GATKReport(); report.addTable(argumentTable); report.addTable(quantizationTable); @@ -340,108 +330,87 @@ public class RecalDataManager { } - public static void generateRecalibrationPlot(File filename, LinkedHashMap> original, boolean keepIntermediates) { + public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) { final Pair files = initializeRecalibrationPlot(filename); - writeCSV(files.getFirst(), original, "ORIGINAL", true); + writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true); outputRecalibrationPlot(files, keepIntermediates); } - public static void generateRecalibrationPlot(File filename, LinkedHashMap> original, LinkedHashMap> recalibrated, boolean keepIntermediates) { + public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) { final Pair files = initializeRecalibrationPlot(filename); - writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true); - writeCSV(files.getFirst(), original, "ORIGINAL", false); + writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true); + writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false); outputRecalibrationPlot(files, keepIntermediates); } - private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap> map, String recalibrationMode, boolean printHeader) { - final int QUALITY_SCORE_COVARIATE_INDEX = 1; - final Map deltaTable = new HashMap(); - BQSRKeyManager deltaKeyManager = null; + private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) { + final NestedHashMap deltaTable = new NestedHashMap(); - - for (Map.Entry> tableEntry : map.entrySet()) { - final BQSRKeyManager keyManager = tableEntry.getKey(); - - if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table - // create a key manager for the delta table - final List requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate - final List optionalCovariates = new ArrayList(); - optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate - optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates - deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager - } + // add the quality score table to the delta table + final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + for (final NestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table + final List newCovs = new ArrayList(4); + newCovs.add(leaf.keys.get(0)); + newCovs.add(requestedCovariates.length); // replace the covariate name with an arbitrary (unused) index for QualityScore + newCovs.add(leaf.keys.get(1)); + newCovs.add(leaf.keys.get(2)); + addToDeltaTable(deltaTable, newCovs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table } - if (deltaKeyManager == null) - throw new ReviewedStingException ("Couldn't find the covariates table"); - - boolean readyToPrint = false; - for (Map.Entry> tableEntry : map.entrySet()) { - final BQSRKeyManager keyManager = tableEntry.getKey(); - - if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table - final Map table = tableEntry.getValue(); - - // add the quality score table to the delta table - for (final Map.Entry entry : table.entrySet()) { // go through every element in the covariates table to create the delta table - final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum) - - final List covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key - final List newCovs = new ArrayList(4); - newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score - newCovs.add(1, covs.get(1)); - newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate) - newCovs.add(3, covs.get(2)); - final long deltaKey = deltaKeyManager.longFromKey(newCovs.toArray()); // create a new bitset key for the delta table - addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table - } - } - - else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table - final Map table = tableEntry.getValue(); - - // add the optional covariates to the delta table - for (final Map.Entry entry : table.entrySet()) { // go through every element in the covariates table to create the delta table - final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum) - - final List covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key - covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS) - final long deltaKey = deltaKeyManager.longFromKey(covs.toArray()); // create a new bitset key for the delta table - addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table - } - readyToPrint = true; - } - - // output the csv file - if (readyToPrint) { - - if (printHeader) { - final List header = new LinkedList(); - header.add("ReadGroup"); - header.add("CovariateValue"); - header.add("CovariateName"); - header.add("EventType"); - header.add("Observations"); - header.add("Errors"); - header.add("EmpiricalQuality"); - header.add("AverageReportedQuality"); - header.add("Accuracy"); - header.add("Recalibration"); - deltaTableFile.println(Utils.join(",", header)); - } - - // print each data line - for (final Map.Entry deltaEntry : deltaTable.entrySet()) { - final List deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey()); - final RecalDatum deltaDatum = deltaEntry.getValue(); - deltaTableFile.print(Utils.join(",", deltaKeys)); - deltaTableFile.print("," + deltaDatum.stringForCSV()); - deltaTableFile.println("," + recalibrationMode); - } - - } - + // add the optional covariates to the delta table + final NestedHashMap covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE); + for (final NestedHashMap.Leaf leaf : covTable.getAllLeaves()) { + final List covs = new ArrayList(leaf.keys); + covs.remove(1); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS) + addToDeltaTable(deltaTable, covs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table } + + // output the csv file + if (printHeader) { + final List header = new LinkedList(); + header.add("ReadGroup"); + header.add("CovariateValue"); + header.add("CovariateName"); + header.add("EventType"); + header.add("Observations"); + header.add("Errors"); + header.add("EmpiricalQuality"); + header.add("AverageReportedQuality"); + header.add("Accuracy"); + header.add("Recalibration"); + deltaTableFile.println(Utils.join(",", header)); + } + + final Map covariateNameMap = new HashMap(requestedCovariates.length); + for (final Covariate covariate : requestedCovariates) + covariateNameMap.put(covariate, parseCovariateName(covariate)); + + // print each data line + for (final NestedHashMap.Leaf leaf : deltaTable.getAllLeaves()) { + final List deltaKeys = generateValuesFromKeys(leaf.keys, requestedCovariates, covariateNameMap); + final RecalDatum deltaDatum = (RecalDatum)leaf.value; + deltaTableFile.print(Utils.join(",", deltaKeys)); + deltaTableFile.print("," + deltaDatum.stringForCSV()); + deltaTableFile.println("," + recalibrationMode); + } + } + + private static List generateValuesFromKeys(final List keys, final Covariate[] covariates, final Map covariateNameMap) { + final List values = new ArrayList(4); + values.add(covariates[0].formatKey((Integer)keys.get(0))); + + // TODO -- create static final variables to hold the indexes of the RG, qual, cov ID, etc. + + final int covariateIndex = (Integer)keys.get(1); + final Covariate covariate = covariateIndex == covariates.length ? covariates[1] : covariates[2 + covariateIndex]; + final int covariateKey = (Integer)keys.get(2); + values.add(covariate.formatKey(covariateKey)); + values.add(covariateNameMap.get(covariate)); + + final EventType event = EventType.eventFrom((Integer)keys.get(3)); + values.add(event); + + return values; } /** @@ -453,15 +422,14 @@ public class RecalDataManager { * @param deltaKey the key to the table * @param recalDatum the recal datum to combine with the accuracyDatum element in the table */ - private static void addToDeltaTable(Map deltaTable, Long deltaKey, RecalDatum recalDatum) { - final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key + private static void addToDeltaTable(final NestedHashMap deltaTable, final Object[] deltaKey, final RecalDatum recalDatum) { + final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key if (deltaDatum == null) - deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum + deltaTable.put(new RecalDatum(recalDatum), deltaKey); // if we don't have a key yet, create a new one with the same values as the curent datum else deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one. } - /** * Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string * @@ -627,13 +595,13 @@ public class RecalDataManager { * * @param read The read for which to compute covariate values. * @param requestedCovariates The list of requested covariates. - * @param readCovariates The object to store the covariate values + * @param resultsStorage The object to store the covariate values */ - public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) { + public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates resultsStorage) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read for (int i = 0; i < requestedCovariates.length; i++) { - readCovariates.setCovariateIndex(i); - requestedCovariates[i].recordValues(read, readCovariates); + resultsStorage.setCovariateIndex(i); + requestedCovariates[i].recordValues(read, resultsStorage); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index 3eb3a3981..b26912c31 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -113,8 +113,7 @@ public class RecalDatum extends Datum { return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported()); } - - private double calcExpectedErrors() { + private double calcExpectedErrors() { return (double) this.numObservations * qualToErrorProb(estimatedQReported); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index 5af15c01c..a7088f4b6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -3,8 +3,9 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import java.io.File; import java.io.PrintStream; @@ -18,14 +19,19 @@ import java.util.*; */ public class RecalibrationReport { private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done) - private final LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager + private final RecalibrationTables recalibrationTables; // quick access reference to the tables private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation + private final HashMap optionalCovariateIndexes; private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter + private final Object[] tempRGarray = new Object[2]; + private final Object[] tempQUALarray = new Object[3]; + private final Object[] tempCOVarray = new Object[5]; + public RecalibrationReport(final File RECAL_FILE) { - GATKReport report = new GATKReport(RECAL_FILE); + final GATKReport report = new GATKReport(RECAL_FILE); argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE); RAC = initializeArgumentCollectionTable(argumentTable); @@ -37,52 +43,39 @@ public class RecalibrationReport { ArrayList requiredCovariates = covariates.getFirst(); ArrayList optionalCovariates = covariates.getSecond(); requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; + optionalCovariateIndexes = new HashMap(optionalCovariates.size()); int covariateIndex = 0; for (final Covariate covariate : requiredCovariates) requestedCovariates[covariateIndex++] = covariate; - for (final Covariate covariate : optionalCovariates) - requestedCovariates[covariateIndex++] = covariate; + for (final Covariate covariate : optionalCovariates) { + requestedCovariates[covariateIndex] = covariate; + final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport + optionalCovariateIndexes.put(covariateName, covariateIndex-2); + covariateIndex++; + } for (Covariate cov : requestedCovariates) cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - keysAndTablesMap = new LinkedHashMap>(); - ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. - ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables - for (Covariate covariate : requiredCovariates) { - requiredCovariatesToAdd.add(covariate); - final Map table; // initializing a new recal table for each required covariate (cumulatively) - final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager + final GATKReportTable rgReportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); + final NestedHashMap rgTable = parseReadGroupTable(rgReportTable); - final int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) - final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check."; - if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table - final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); - table = parseReadGroupTable(keyManager, reportTable); - } - else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table - final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE); - table = parseQualityScoreTable(keyManager, reportTable); - } - else - throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION); + final GATKReportTable qualReportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE); + final NestedHashMap qualTable = parseQualityScoreTable(qualReportTable); - keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map - } + final GATKReportTable covReportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); + final NestedHashMap covTable = parseAllCovariatesTable(covReportTable); - - final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager - final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); - final Map table = parseAllCovariatesTable(keyManager, reportTable); - keysAndTablesMap.put(keyManager, table); + recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable); } - protected RecalibrationReport(final QuantizationInfo quantizationInfo, final LinkedHashMap> keysAndTablesMap, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) { + protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) { this.quantizationInfo = quantizationInfo; - this.keysAndTablesMap = keysAndTablesMap; + this.recalibrationTables = recalibrationTables; this.argumentTable = argumentTable; this.RAC = RAC; this.requestedCovariates = null; + this.optionalCovariateIndexes = null; } /** @@ -98,29 +91,20 @@ public class RecalibrationReport { * * @param other the recalibration report to combine with this one */ - public void combine(RecalibrationReport other) { - Iterator>> thisIterator = keysAndTablesMap.entrySet().iterator(); + public void combine(final RecalibrationReport other) { - for (Map.Entry> otherEntry : other.getKeysAndTablesMap().entrySet()) { - Map.Entry> thisEntry = thisIterator.next(); + for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) { + final NestedHashMap myTable = recalibrationTables.getTable(type); + final NestedHashMap otherTable = other.recalibrationTables.getTable(type); - final Map thisTable = thisEntry.getValue(); - final BQSRKeyManager thisKeyManager = thisEntry.getKey(); - final BQSRKeyManager otherKeyManager = otherEntry.getKey(); + for (final NestedHashMap.Leaf row : otherTable.getAllLeaves()) { + final RecalDatum myDatum = (RecalDatum)myTable.get(row.keys); - for (Map.Entry otherTableEntry : otherEntry.getValue().entrySet()) { - final RecalDatum otherDatum = otherTableEntry.getValue(); - final Long otherBitKey = otherTableEntry.getKey(); - final List otherObjectKey = otherKeyManager.keySetFrom(otherBitKey); - - final long thisKey = thisKeyManager.longFromKey(otherObjectKey.toArray()); - final RecalDatum thisDatum = thisTable.get(thisKey); - - if (thisDatum == null) - thisTable.put(thisKey, otherDatum); + if (myDatum == null) + myTable.put(row.value, row.keys); else - thisDatum.combine(otherDatum); - } + myDatum.combine((RecalDatum)row.value); + } } } @@ -128,8 +112,8 @@ public class RecalibrationReport { return quantizationInfo; } - public LinkedHashMap> getKeysAndTablesMap() { - return keysAndTablesMap; + public RecalibrationTables getRecalibrationTables() { + return recalibrationTables; } public Covariate[] getRequestedCovariates() { @@ -139,82 +123,87 @@ public class RecalibrationReport { /** * Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table * - * @param keyManager the key manager for this table * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { - ArrayList columnNamesOrderedList = new ArrayList(5); - columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); - return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, false); + private NestedHashMap parseAllCovariatesTable(final GATKReportTable reportTable) { + final NestedHashMap result = new NestedHashMap(); + + for ( int i = 0; i < reportTable.getNumRows(); i++ ) { + final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg); + final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual); + final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME); + final int covIndex = optionalCovariateIndexes.get(covName); + tempCOVarray[2] = covIndex; + final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME); + tempCOVarray[3] = requestedCovariates[covIndex + 2].keyFromValue(covValue); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + tempCOVarray[4] = event.index; + + result.put(getRecalDatum(reportTable, i, false), tempCOVarray); + } + + return result; } /** * * Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table - * @param keyManager the key manager for this table * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { - ArrayList columnNamesOrderedList = new ArrayList(3); - columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); - return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, false); + private NestedHashMap parseQualityScoreTable(final GATKReportTable reportTable) { + final NestedHashMap result = new NestedHashMap(); + + for ( int i = 0; i < reportTable.getNumRows(); i++ ) { + final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg); + final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME); + tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + tempQUALarray[2] = event.index; + + result.put(getRecalDatum(reportTable, i, false), tempQUALarray); + } + + return result; } /** * Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table * - * @param keyManager the key manager for this table * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { - ArrayList columnNamesOrderedList = new ArrayList(2); - columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); - columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); - return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, true); - } - - /** - * Shared parsing functionality for all tables. - * - * @param keyManager the key manager for this table - * @param reportTable the GATKReport table containing data for this table - * @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table - * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. - */ - private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList columnNamesOrderedList, boolean hasEstimatedQReportedColumn) { - final Map result = new HashMap(reportTable.getNumRows()*2); + private NestedHashMap parseReadGroupTable(final GATKReportTable reportTable) { + final NestedHashMap result = new NestedHashMap(); for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - final int nKeys = columnNamesOrderedList.size(); - final Object [] keySet = new Object[nKeys]; - for (int j = 0; j < nKeys; j++) - keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) - keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager). - final long bitKey = keyManager.longFromKey(keySet); + final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME); + tempRGarray[0] = requestedCovariates[0].keyFromValue(rg); + final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME)); + tempRGarray[1] = event.index; - final long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - final long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - final double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); - - final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table - - final RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality); - result.put(bitKey, recalDatum); + result.put(getRecalDatum(reportTable, i, true), tempRGarray); } + return result; } + private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) { + final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); + final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); + final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + + final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table + (Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + + return new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality); + } + /** * Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores * @@ -308,55 +297,21 @@ public class RecalibrationReport { * and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer. */ public void calculateEmpiricalAndQuantizedQualities() { - for (Map table : keysAndTablesMap.values()) - for (RecalDatum datum : table.values()) - datum.calcCombinedEmpiricalQuality(); + for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) { + final NestedHashMap table = recalibrationTables.getTable(type); + for (final Object value : table.getAllValues()) { + ((RecalDatum)value).calcCombinedEmpiricalQuality(); + } + } - quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS); + quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS); } public void output(PrintStream output) { - RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, keysAndTablesMap, output); + RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); } public RecalibrationArgumentCollection getRAC() { return RAC; } - - @Override - public boolean equals(Object o) { - if (!(o instanceof RecalibrationReport)) - return false; - RecalibrationReport other = (RecalibrationReport) o; - if (this == o) - return true; - return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap); - } - - private boolean isEqualTable(LinkedHashMap> t1, LinkedHashMap> t2) { - if (t1.size() != t2.size()) - return false; - - final Iterator>> t1Iterator = t1.entrySet().iterator(); - final Iterator>> t2Iterator = t2.entrySet().iterator(); - - while (t1Iterator.hasNext() && t2Iterator.hasNext()) { - Map.Entry> t1MapEntry = t1Iterator.next(); - Map.Entry> t2MapEntry = t2Iterator.next(); - - if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey()))) - return false; - - final Map table2 = t2MapEntry.getValue(); - for (Map.Entry t1TableEntry : t1MapEntry.getValue().entrySet()) { - final Long t1Key = t1TableEntry.getKey(); - if (!table2.containsKey(t1Key)) - return false; - final RecalDatum t1Datum = t1TableEntry.getValue(); - if (!t1Datum.equals(table2.get(t1Key))) - return false; - } - } - return true; - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 3871ca987..393dd5735 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import java.util.Arrays; + /** * BaseUtils contains some basic utilities for manipulating nucleotides. */ @@ -47,6 +49,20 @@ public class BaseUtils { public boolean sameBase(int i) { return index == i; } } + static private final int[] baseIndexMap = new int[256]; + static { + Arrays.fill(baseIndexMap, -1); + baseIndexMap['A'] = 0; + baseIndexMap['a'] = 0; + baseIndexMap['*'] = 0; // the wildcard character counts as an A + baseIndexMap['C'] = 1; + baseIndexMap['c'] = 1; + baseIndexMap['G'] = 2; + baseIndexMap['g'] = 2; + baseIndexMap['T'] = 3; + baseIndexMap['t'] = 3; + } + // todo -- fix me (enums?) public static final byte DELETION_INDEX = 4; public static final byte NO_CALL_INDEX = 5; // (this is 'N') @@ -182,27 +198,7 @@ public class BaseUtils { * @return 0, 1, 2, 3, or -1 if the base can't be understood */ static public int simpleBaseToBaseIndex(byte base) { - switch (base) { - case '*': // the wildcard character counts as an A - case 'A': - case 'a': - return 0; - - case 'C': - case 'c': - return 1; - - case 'G': - case 'g': - return 2; - - case 'T': - case 't': - return 3; - - default: - return -1; - } + return baseIndexMap[base]; } /** @@ -213,27 +209,7 @@ public class BaseUtils { */ @Deprecated static public int simpleBaseToBaseIndex(char base) { - switch (base) { - case '*': // the wildcard character counts as an A - case 'A': - case 'a': - return 0; - - case 'C': - case 'c': - return 1; - - case 'G': - case 'g': - return 2; - - case 'T': - case 't': - return 3; - - default: - return -1; - } + return baseIndexMap[base]; } static public int extendedBaseToBaseIndex(byte base) { @@ -284,11 +260,6 @@ public class BaseUtils { } } - @Deprecated - static public char baseIndexToSimpleBaseAsChar(int baseIndex) { - return (char) baseIndexToSimpleBase(baseIndex); - } - /** * Converts a base index to a base index representing its cross-talk partner * diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java index 8652d3c28..6e79b7f24 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedHashMap.java @@ -25,7 +25,9 @@ package org.broadinstitute.sting.utils.collections; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -83,4 +85,53 @@ public class NestedHashMap { return value; // todo -- should never reach this point } + + public List getAllValues() { + List result = new ArrayList(); + fillAllValues(data, result); + return result; + } + + private void fillAllValues(final Map map, final List result) { + for ( Object value : map.values() ) { + if ( value == null ) + continue; + if ( value instanceof Map ) + fillAllValues((Map)value, result); + else + result.add(value); + } + } + + public static class Leaf { + public final List keys; + public final Object value; + + public Leaf(final List keys, final Object value) { + this.keys = keys; + this.value = value; + } + } + + public List getAllLeaves() { + List result = new ArrayList(); + List path = new ArrayList(); + fillAllLeaves(data, path, result); + return result; + } + + private void fillAllLeaves(final Map map, final List path, final List result) { + for ( final Object key : map.keySet() ) { + final Object value = map.get(key); + if ( value == null ) + continue; + final List newPath = new ArrayList(path); + newPath.add(key); + if ( value instanceof Map ) { + fillAllLeaves((Map) value, newPath, result); + } else { + result.add(new Leaf(newPath, value)); + } + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 631d69858..3612693da 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -28,10 +28,10 @@ package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.walkers.bqsr.*; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; -import java.util.*; /** * Utility methods to facilitate on-the-fly base quality score recalibration. @@ -45,39 +45,15 @@ public class BaseRecalibration { private final ReadCovariates readCovariates; private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) - private final KeysAndTables keysAndTables; + private final RecalibrationTables recalibrationTables; private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation - static class KeysAndTables { + private final Object[] tempKeySet; - public enum Type { - READ_GROUP_TABLE(0), - QUALITY_SCORE_TABLE(1), - OPTIONAL_COVARIATE_TABLE(2); - - private final int index; - - private Type(int index) { - this.index = index; - } - } - - public final BQSRKeyManager[] managers = new BQSRKeyManager[Type.values().length]; - public final Map[] tables = new Map[Type.values().length]; - - public KeysAndTables(final Map> keysAndTablesMap) { - for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { - Type type; - if (mapEntry.getKey().getNumRequiredCovariates() == 1) - type = Type.READ_GROUP_TABLE; - else if (mapEntry.getKey().getNumOptionalCovariates() == 0) - type = Type.QUALITY_SCORE_TABLE; - else - type = Type.OPTIONAL_COVARIATE_TABLE; - managers[type.index] = mapEntry.getKey(); - tables[type.index] = mapEntry.getValue(); - } - } + private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. + static { + for (int i = 0; i < EventType.values().length; i++) + qualityScoreByFullCovariateKey[i] = new NestedHashMap(); } /** @@ -89,7 +65,7 @@ public class BaseRecalibration { public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) { RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); - keysAndTables = new KeysAndTables(recalibrationReport.getKeysAndTablesMap()); + recalibrationTables = recalibrationReport.getRecalibrationTables(); requestedCovariates = recalibrationReport.getRequestedCovariates(); quantizationInfo = recalibrationReport.getQuantizationInfo(); if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores @@ -98,20 +74,22 @@ public class BaseRecalibration { quantizationInfo.quantizeQualityScores(quantizationLevels); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); + tempKeySet = new Integer[requestedCovariates.length]; } /** * This constructor only exists for testing purposes. * * @param quantizationInfo the quantization info object - * @param keysAndTablesMap the map of key managers and recalibration tables + * @param recalibrationTables the map of key managers and recalibration tables * @param requestedCovariates the list of requested covariates */ - protected BaseRecalibration(final QuantizationInfo quantizationInfo, final LinkedHashMap> keysAndTablesMap, final Covariate[] requestedCovariates) { + protected BaseRecalibration(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { this.quantizationInfo = quantizationInfo; - keysAndTables = new KeysAndTables(keysAndTablesMap); + this.recalibrationTables = recalibrationTables; this.requestedCovariates = requestedCovariates; readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); + tempKeySet = new Integer[requestedCovariates.length]; } /** @@ -125,13 +103,20 @@ public class BaseRecalibration { RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings final byte[] quals = read.getBaseQualities(errorModel); + final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model + + final int readLength = read.getReadLength(); + for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read - for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read final byte originalQualityScore = quals[offset]; if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) - final long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model - final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base + final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model + Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(wrapKeySet(keySet)); + if (recalibratedQualityScore == null) { + recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base + qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, keySet); + } quals[offset] = recalibratedQualityScore; } } @@ -139,7 +124,11 @@ public class BaseRecalibration { } } - + private Object[] wrapKeySet(final int[] keySet) { + for (int i = 0; i < keySet.length; i++) + tempKeySet[i] = keySet[i]; + return tempKeySet; + } /** * Implements a serial recalibration of the reads using the combinational table. @@ -158,24 +147,23 @@ public class BaseRecalibration { * @param errorModel the event type * @return A recalibrated quality score as a byte */ - protected byte performSequentialQualityCalculation(final long[] key, final EventType errorModel) { + protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) { - final double globalDeltaQ = calculateGlobalDeltaQ(keysAndTables.managers[KeysAndTables.Type.READ_GROUP_TABLE.index], keysAndTables.tables[KeysAndTables.Type.READ_GROUP_TABLE.index], key, errorModel); - final double deltaQReported = calculateDeltaQReported(keysAndTables.managers[KeysAndTables.Type.QUALITY_SCORE_TABLE.index], keysAndTables.tables[KeysAndTables.Type.QUALITY_SCORE_TABLE.index], key, errorModel, globalDeltaQ); - final double deltaQCovariates = calculateDeltaQCovariates(keysAndTables.managers[KeysAndTables.Type.OPTIONAL_COVARIATE_TABLE.index], keysAndTables.tables[KeysAndTables.Type.OPTIONAL_COVARIATE_TABLE.index], key, errorModel, globalDeltaQ, deltaQReported); + final byte qualFromRead = (byte)(long)key[1]; + final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel); + final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead); + final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE), key, errorModel, globalDeltaQ, deltaQReported, qualFromRead); - final byte qualFromRead = (byte)key[1]; double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality } - private double calculateGlobalDeltaQ(final BQSRKeyManager keyManager, final Map table, final long[] key, final EventType errorModel) { + private double calculateGlobalDeltaQ(final NestedHashMap table, final int[] key, final EventType errorModel) { double result = 0.0; - final long masterKey = keyManager.createMasterKey(key, errorModel, -1); - final RecalDatum empiricalQualRG = table.get(masterKey); + final RecalDatum empiricalQualRG = (RecalDatum)table.get(key[0], errorModel.index); if (empiricalQualRG != null) { final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(); final double aggregrateQReported = empiricalQualRG.getEstimatedQReported(); @@ -185,32 +173,28 @@ public class BaseRecalibration { return result; } - private double calculateDeltaQReported(final BQSRKeyManager keyManager, final Map table, final long[] key, final EventType errorModel, final double globalDeltaQ) { + private double calculateDeltaQReported(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) { double result = 0.0; - final long masterKey = keyManager.createMasterKey(key, errorModel, -1); - final RecalDatum empiricalQualQS = table.get(masterKey); + final RecalDatum empiricalQualQS = (RecalDatum)table.get(key[0], key[1], errorModel.index); if (empiricalQualQS != null) { final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(); - final byte qualFromRead = (byte)key[1]; result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; } return result; } - private double calculateDeltaQCovariates(final BQSRKeyManager keyManager, final Map table, final long[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported) { + private double calculateDeltaQCovariates(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) { double result = 0.0; - final int numOptionalCovariates = keyManager.getNumOptionalCovariates(); - for (int i = 0; i < numOptionalCovariates; i++) { - final long masterKey = keyManager.createMasterKey(key, errorModel, i); - if (masterKey < 0) + // for all optional covariates + for (int i = 2; i < requestedCovariates.length; i++) { + if (key[i] < 0) continue; - final RecalDatum empiricalQualCO = table.get(masterKey); + final RecalDatum empiricalQualCO = (RecalDatum)table.get(key[0], key[1], (i-2), key[i], errorModel.index); if (empiricalQualCO != null) { final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); - final byte qualFromRead = (byte)key[1]; result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java new file mode 100644 index 000000000..aa77b5142 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.utils.collections.NestedHashMap; + +/** + * Utility class to facilitate on-the-fly base quality score recalibration. + * + * User: ebanks + * Date: 6/20/12 + */ + +public class RecalibrationTables { + + public enum TableType { + READ_GROUP_TABLE(0), + QUALITY_SCORE_TABLE(1), + OPTIONAL_COVARIATE_TABLE(2); + + private final int index; + + private TableType(final int index) { + this.index = index; + } + } + + private final NestedHashMap[] tables = new NestedHashMap[TableType.values().length]; + + public RecalibrationTables(final NestedHashMap rgMap, final NestedHashMap qualMap, final NestedHashMap covMap) { + tables[TableType.READ_GROUP_TABLE.index] = rgMap; + tables[TableType.QUALITY_SCORE_TABLE.index] = qualMap; + tables[TableType.OPTIONAL_COVARIATE_TABLE.index] = covMap; + } + + public NestedHashMap getTable(final TableType type) { + return tables[type.index]; + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java deleted file mode 100644 index da1678d54..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java +++ /dev/null @@ -1,158 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.bqsr; - -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -/** - * @author Mauricio Carneiro - * @since 3/7/12 - */ -public class BQSRKeyManagerUnitTest { - RecalibrationArgumentCollection RAC; - - @BeforeClass - public void init() { - RAC = new RecalibrationArgumentCollection(); - } - - @Test(enabled = false) - public void testCombineBitSets() { - final int nRequired = 2; - final ArrayList covariates = new ArrayList(); - covariates.add(new ReadGroupCovariate()); - covariates.add(new QualityScoreCovariate()); - covariates.add(new CycleCovariate()); - covariates.add(new ContextCovariate()); - createReadAndTest(covariates, nRequired); - } - - @Test(enabled = true) - public void testOnlyRequiredCovariates() { - final int nRequired = 2; - final ArrayList covariates = new ArrayList(2); - covariates.add(new ReadGroupCovariate()); - covariates.add(new QualityScoreCovariate()); - createReadAndTest(covariates, nRequired); - } - - @Test(enabled = true) - public void testOnlyOneCovariate() { - final int nRequired = 1; - final ArrayList covariates = new ArrayList(2); - covariates.add(new ReadGroupCovariate()); - createReadAndTest(covariates, nRequired); - } - - @Test(enabled = false) - public void testOneCovariateWithOptionalCovariates() { - final int nRequired = 1; - final ArrayList covariates = new ArrayList(4); - covariates.add(new ReadGroupCovariate()); - covariates.add(new QualityScoreCovariate()); - covariates.add(new CycleCovariate()); - covariates.add(new ContextCovariate()); - createReadAndTest(covariates, nRequired); - } - - private void createReadAndTest(List covariates, int nRequired) { - int readLength = 1000; - GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M"); - read.setReadGroup(new GATKSAMReadGroupRecord("ID")); - read.getReadGroup().setPlatform("illumina"); - - runTestOnRead(read, covariates, nRequired); - read.setReadNegativeStrandFlag(true); - runTestOnRead(read, covariates, nRequired); - read.setReadPairedFlag(true); - read.setSecondOfPairFlag(true); - runTestOnRead(read, covariates, nRequired); - read.setReadNegativeStrandFlag(false); - runTestOnRead(read, covariates, nRequired); - } - - private void runTestOnRead(GATKSAMRecord read, List covariateList, int nRequired) { - final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][read.getReadLength()]; - ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), covariateList.size()); - for (int i = 0; i < covariateList.size(); i++) { - final Covariate cov = covariateList.get(i); - cov.initialize(RAC); - readCovariates.setCovariateIndex(i); - cov.recordValues(read, readCovariates); - } - for (int i = 0; i < read.getReadLength(); i++) { - for (EventType eventType : EventType.values()) { - final long[] vals = readCovariates.getKeySet(i, eventType); - for (int j = 0; j < vals.length; j++) - covariateKeys[j][eventType.index][i] = vals[j]; - } - } - - List requiredCovariates = new LinkedList(); - List optionalCovariates = new LinkedList(); - - for (int j=0; j optionalCovariates, - final Object[] expectedRequired, final Object[] expectedCovariate, final EventType eventType, final int index) { - - Object[] actual = keyManager.keySetFrom(key).toArray(); - - // Build the expected array - Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)]; - System.arraycopy(expectedRequired, 0, expected, 0, nRequired); - if (optionalCovariates.size() > 0) { - expected[expected.length-3] = expectedCovariate[index]; - expected[expected.length-2] = optionalCovariates.get(index).getClass().getSimpleName().split("Covariate")[0]; - } - expected[expected.length-1] = eventType; - -// System.out.println("Actual : " + Utils.join(",", Arrays.asList(actual))); -// System.out.println("Expected: " + Utils.join(",", Arrays.asList(expected))); -// System.out.println(); - - for (int k = 0; k < expected.length; k++) - Assert.assertEquals(actual[k], expected[k]); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index ee5395454..553b7e237 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -36,7 +36,7 @@ public class ContextCovariateUnitTest { verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); } - public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { + public static void verifyCovariateArray(int[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { for (int i = 0; i < values.length; i++) Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java index 79b57fd8f..3fa1e916d 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java @@ -47,7 +47,7 @@ public class CycleCovariateUnitTest { verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1); } - private void verifyCovariateArray(long[][] values, int init, int increment) { + private void verifyCovariateArray(int[][] values, int init, int increment) { for (short i = 0; i < values.length; i++) { short actual = Short.decode(covariate.formatKey(values[i][0])); int expected = init + (increment * i); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java index 4970413e8..a83508353 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariateUnitTest.java @@ -46,8 +46,8 @@ public class ReadGroupCovariateUnitTest { } - private void verifyCovariateArray(long[][] values, String expected) { - for (long[] value : values) { + private void verifyCovariateArray(int[][] values, String expected) { + for (int[] value : values) { String actual = covariate.formatKey(value[0]); Assert.assertEquals(actual, expected); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java index e5fde0efc..d1f2d6342 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java @@ -1,7 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -32,7 +34,6 @@ public class RecalibrationReportUnitTest { final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts); final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); - final LinkedHashMap> keysAndTablesMap = new LinkedHashMap>(); quantizationInfo.noQuantization(); final List requiredCovariates = new LinkedList(); @@ -41,14 +42,10 @@ public class RecalibrationReportUnitTest { final ReadGroupCovariate rgCovariate = new ReadGroupCovariate(); rgCovariate.initialize(RAC); requiredCovariates.add(rgCovariate); - final BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); - keysAndTablesMap.put(rgKeyManager, new HashMap()); final QualityScoreCovariate qsCovariate = new QualityScoreCovariate(); qsCovariate.initialize(RAC); requiredCovariates.add(qsCovariate); - final BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); - keysAndTablesMap.put(qsKeyManager, new HashMap()); final ContextCovariate cxCovariate = new ContextCovariate(); cxCovariate.initialize(RAC); @@ -56,8 +53,6 @@ public class RecalibrationReportUnitTest { final CycleCovariate cyCovariate = new CycleCovariate(); cyCovariate.initialize(RAC); optionalCovariates.add(cyCovariate); - BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); - keysAndTablesMap.put(cvKeyManager, new HashMap()); final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; int covariateIndex = 0; @@ -75,34 +70,35 @@ public class RecalibrationReportUnitTest { readQuals[i] = 20; read.setBaseQualities(readQuals); - final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); int nKeys = 0; // keep track of how many keys were produced final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); - for (int offset = 0; offset < length; offset++) { - for (Map.Entry> entry : keysAndTablesMap.entrySet()) { - BQSRKeyManager keyManager = entry.getKey(); - Map table = entry.getValue(); - final int numOptionalCovariates = keyManager.getNumOptionalCovariates(); - if (numOptionalCovariates == 0) { - table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, -1), RecalDatum.createRandomRecalDatum(10000, 10)); - table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, -1), RecalDatum.createRandomRecalDatum(100000, 10)); - table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, -1), RecalDatum.createRandomRecalDatum(100000, 10)); - nKeys += 3; - } else { - for (int j = 0; j < numOptionalCovariates; j++) { - table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, j), RecalDatum.createRandomRecalDatum(10000, 10)); - table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, j), RecalDatum.createRandomRecalDatum(100000, 10)); - table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, j), RecalDatum.createRandomRecalDatum(100000, 10)); - nKeys += 3; - } + final NestedHashMap rgTable = new NestedHashMap(); + final NestedHashMap qualTable = new NestedHashMap(); + final NestedHashMap covTable = new NestedHashMap(); + + for (int offset = 0; offset < length; offset++) { + + for (EventType errorMode : EventType.values()) { + + final int[] covariates = rc.getKeySet(offset, errorMode); + final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000; + + rgTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index); + qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index); + nKeys += 2; + for (int j = 0; j < optionalCovariates.size(); j++) { + covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[2 + j], errorMode.index); + nKeys++; } } } Assert.assertEquals(nKeys, expectedKeys); - RecalibrationReport report = new RecalibrationReport(quantizationInfo, keysAndTablesMap, RAC.generateReportTable(), RAC); + final RecalibrationTables recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable); + + final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC); File output = new File("RecalibrationReportUnitTestOutuput.grp"); PrintStream out; diff --git a/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java index fd53283b1..32fe7597d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/BitSetUtilsUnitTest.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.walkers.bqsr.BQSRKeyManager; -import org.broadinstitute.sting.gatk.walkers.bqsr.ContextCovariate; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -59,19 +57,4 @@ public class BitSetUtilsUnitTest { //for (String d : dna) // Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d); } - - @Test(enabled = true) - public void testNumberOfBitsToRepresent() { - Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented - Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented - Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented - - for (int i = 1; i < 63; i++) { // Can't test i == 63 because n1 is a negative number - long n1 = 1L << i; - long n2 = Math.abs(random.nextLong()) % n1; - long n3 = n1 | n2; - Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1); - Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(n1), i); - } - } } diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java index f70466d4f..982ac03bd 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/BaseRecalibrationUnitTest.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.walkers.bqsr.*; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -22,7 +21,7 @@ import java.util.*; public class BaseRecalibrationUnitTest { private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager; - private LinkedHashMap> keysAndTablesMap; + private RecalibrationTables recalibrationTables; private ReadGroupCovariate rgCovariate; private QualityScoreCovariate qsCovariate; @@ -50,19 +49,14 @@ public class BaseRecalibrationUnitTest { List optionalCovariates = new ArrayList(); dataManager = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager(true, 4); - keysAndTablesMap = new LinkedHashMap>(); rgCovariate = new ReadGroupCovariate(); rgCovariate.initialize(RAC); requiredCovariates.add(rgCovariate); - BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); - keysAndTablesMap.put(rgKeyManager, new HashMap()); qsCovariate = new QualityScoreCovariate(); qsCovariate.initialize(RAC); requiredCovariates.add(qsCovariate); - BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); - keysAndTablesMap.put(qsKeyManager, new HashMap()); cxCovariate = new ContextCovariate(); cxCovariate.initialize(RAC); @@ -70,8 +64,6 @@ public class BaseRecalibrationUnitTest { cyCovariate = new CycleCovariate(); cyCovariate.initialize(RAC); optionalCovariates.add(cyCovariate); - BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); - keysAndTablesMap.put(cvKeyManager, new HashMap()); final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; int covariateIndex = 0; @@ -82,10 +74,13 @@ public class BaseRecalibrationUnitTest { readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); - for (int i=0; i> mapEntry : keysAndTablesMap.entrySet()) { - final BQSRKeyManager keyManager = mapEntry.getKey(); - final int numOptionalCovariates = keyManager.getNumOptionalCovariates(); - if (numOptionalCovariates == 0) { - final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, -1); - updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum); - } else { - for (int j = 0; j < numOptionalCovariates; j++) { - final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, j); - updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum); - } - } + + rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index); + qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index); + for (int j = 0; j < optionalCovariates.size(); j++) { + covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[2 + j], EventType.BASE_SUBSTITUTION.index); } } - dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE); + + recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable); + + dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE); List quantizedQuals = new ArrayList(); List qualCounts = new ArrayList(); @@ -121,16 +112,15 @@ public class BaseRecalibrationUnitTest { } QuantizationInfo quantizationInfo = new QuantizationInfo(quantizedQuals, qualCounts); quantizationInfo.noQuantization(); - baseRecalibration = new BaseRecalibration(quantizationInfo, keysAndTablesMap, requestedCovariates); + baseRecalibration = new BaseRecalibration(quantizationInfo, recalibrationTables, requestedCovariates); } @Test(enabled=false) public void testGoldStandardComparison() { - debugTables(); for (int i = 0; i < read.getReadLength(); i++) { - long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION); + int [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION); Object [] objKey = buildObjectKey(bitKey); byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION); byte v1 = goldStandardSequentialCalculation(objKey); @@ -138,7 +128,7 @@ public class BaseRecalibrationUnitTest { } } - private Object[] buildObjectKey(long[] bitKey) { + private Object[] buildObjectKey(final int[] bitKey) { Object[] key = new Object[bitKey.length]; key[0] = rgCovariate.formatKey(bitKey[0]); key[1] = qsCovariate.formatKey(bitKey[1]); @@ -147,49 +137,6 @@ public class BaseRecalibrationUnitTest { return key; } - private void debugTables() { - System.out.println("\nV1 Table\n"); - System.out.println("ReadGroup Table:"); - NestedHashMap nestedTable = dataManager.getCollapsedTable(0); - printNestedHashMap(nestedTable.data, ""); - System.out.println("\nQualityScore Table:"); - nestedTable = dataManager.getCollapsedTable(1); - printNestedHashMap(nestedTable.data, ""); - System.out.println("\nCovariates Table:"); - nestedTable = dataManager.getCollapsedTable(2); - printNestedHashMap(nestedTable.data, ""); - nestedTable = dataManager.getCollapsedTable(3); - printNestedHashMap(nestedTable.data, ""); - - - int i = 0; - System.out.println("\nV2 Table\n"); - for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { - BQSRKeyManager keyManager = mapEntry.getKey(); - Map table = mapEntry.getValue(); - switch(i++) { - case 0 : - System.out.println("ReadGroup Table:"); - break; - case 1 : - System.out.println("QualityScore Table:"); - break; - case 2 : - System.out.println("Covariates Table:"); - break; - } - for (Map.Entry entry : table.entrySet()) { - Long key = entry.getKey(); - RecalDatum datum = entry.getValue(); - List keySet = keyManager.keySetFrom(key); - System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported()); - } - System.out.println(); - } - - - } - private static void printNestedHashMap(Map table, String output) { for (Object key : table.keySet()) { String ret;