diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java index 728d1a8d4..52d748731 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.utils.BitSetUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; @@ -33,20 +34,21 @@ public class BQSRKeyManager { private final Map covariateNameToIDMap; private int nRequiredBits; // Number of bits used to represent the required covariates - private int nOptionalBits; // Number of bits used to represent the standard covaraites - private final int nOptionalIDBits; // Number of bits used to represent the optional covariates IDs - private final int totalNumberOfBits; // Sum of all of the above plus the event bits - - private final BitSet optionalCovariateMask; // Standard mask for optional covariates bitset - private final BitSet optionalCovariateIDMask; // Standard mask for optional covariates order bitset - + + private final int optionalCovariateOffset; + private final int optionalCovariateIDOffset; + + private final long optionalCovariateMask; // Standard mask for optional covariates key + private final long optionalCovariateIDMask; // Standard mask for optional covariates order key + private final long eventIDMask; // Standard mask for event ID + /** * Initializes the KeyManager with the total number of covariates to use * * @param requiredCovariates the ordered list of required covariates * @param optionalCovariates the ordered list of optional covariates */ - public BQSRKeyManager(List requiredCovariates, List optionalCovariates) { + public BQSRKeyManager(final List requiredCovariates, final List optionalCovariates) { this.requiredCovariates = new ArrayList(requiredCovariates); this.optionalCovariates = new ArrayList(optionalCovariates); requiredCovariatesInfo = new ArrayList(requiredCovariates.size()); // initialize the required covariates list @@ -54,29 +56,35 @@ public class BQSRKeyManager { covariateNameToIDMap = new HashMap(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates) nRequiredBits = 0; - for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management - int nBits = required.numberOfBits(); // number of bits used by this covariate - BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate - requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate + for (final Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management + final int nBits = required.numberOfBits(); // number of bits used by this covariate + final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate + requiredCovariatesInfo.add(new RequiredCovariateInfo(nBits, nRequiredBits, mask, required)); // Create an object for this required covariate nRequiredBits += nBits; } + final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length); + eventIDMask = genericMask(nRequiredBits, bitsInEventType); + short id = 0; - nOptionalBits = 0; - for (Covariate optional : optionalCovariates) { - int nBits = optional.numberOfBits(); // number of bits used by this covariate - nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate - BitSet optionalID = bitSetFromId(id); // calculate the optional covariate ID for this covariate - optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object - String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport + int nOptionalBits = 0; + for (final Covariate optional : optionalCovariates) { + nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate + optionalCovariatesInfo.add(new OptionalCovariateInfo(id, optional)); + final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport covariateNameToIDMap.put(covariateName, id); id++; } - nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID - optionalCovariateMask = genericMask(nRequiredBits, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset - optionalCovariateIDMask = genericMask(nRequiredBits + nOptionalBits, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset - totalNumberOfBits = nRequiredBits + nOptionalBits + nOptionalIDBits + bitsInEventType(); // total number of bits used in the final key + optionalCovariateOffset = nRequiredBits + bitsInEventType; + optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset + optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits; + final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID + optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset + + final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key + if ( totalNumberOfBits > 64 ) + throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a Long"); } /** @@ -93,80 +101,78 @@ public class BQSRKeyManager { * * Note: If there are no optional covariates, only one bitset key will be returned with all the required covariates and the event type * - * @param allKeys The keys in bitset representation for each covariate + * @param allKeys The keys in long representation for each covariate * @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions) - * @return one key in bitset representation per covariate + * @return one key in long representation per covariate */ - public List bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) { - List allBitSets = new ArrayList(); // Generate one key per optional covariate - - BitSet eventBitSet = bitSetFromEvent(eventType); // create a bitset with the event type - int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits + public List longsFromAllKeys(Long[] allKeys, EventType eventType) { + final List allFinalKeys = new ArrayList(); // Generate one key per optional covariate int covariateIndex = 0; - BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on + long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) - addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set + masterKey |= (allKeys[covariateIndex++] << infoRequired.offset); + + final long eventKey = keyFromEvent(eventType); // create a key for the event type + masterKey |= (eventKey << nRequiredBits); for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) { - BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys + final Long covariateKey = allKeys[covariateIndex++]; if (covariateKey == null) continue; // do not add nulls to the final set of keys. - BitSet optionalKey = new BitSet(totalNumberOfBits); // create a new key for this optional covariate - optionalKey.or(requiredKey); // import all the required covariates - addBitSetToKeyAtLocation(optionalKey, covariateKey, nRequiredBits); // add the optional covariate right after the required covariates - addBitSetToKeyAtLocation(optionalKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite - addBitSetToKeyAtLocation(optionalKey, eventBitSet, eventTypeBitIndex); // Add the event type - allBitSets.add(optionalKey); // add this key to the list of keys + long newKey = masterKey | (covariateKey << optionalCovariateOffset); + newKey |= (infoOptional.covariateID << optionalCovariateIDOffset); + + if ( newKey < 0 ) + System.out.println("*** " + newKey); + + allFinalKeys.add(newKey); // add this key to the list of keys } - if (optionalCovariatesInfo.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key) - addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type - allBitSets.add(requiredKey); // add this key to the list of keys - } + if (optionalCovariatesInfo.size() == 0) // special case when we have no optional covariates + allFinalKeys.add(masterKey); - return allBitSets; + return allFinalKeys; } /** - * Generates one bitset key for the covariates represented in Object[] key + * Generates one key for the covariates represented in Object[] key * * The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file) - * and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one bitset key, not many. + * and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many. * * Example key: * RG, QUAL, CYCLE, CYCLE_ID, EventType * * @param key list of objects produced by the required covariates followed by one or zero optional covariates. - * @return a bitset key representing these objects. Bitset encryption is done using the covariate's interface. + * @return a key representing these objects. */ - public BitSet bitSetFromKey(Object[] key) { - BitSet bitSetKey = new BitSet(totalNumberOfBits); - + public Long longFromKey(Object[] key) { int requiredCovariate = 0; - for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) { - BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface - addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key - } - - if (optionalCovariatesInfo.size() > 0) { - int optionalCovariate = requiredCovariatesInfo.size(); // the optional covariate index in the key array - int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's - int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index - OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information - - BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface - addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates - addBitSetToKeyAtLocation(bitSetKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite - } - - int eventIndex = key.length - 1; // the event type is always the last key - int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // location in the bit set to add the event type bits - BitSet eventBitSet = bitSetFromEvent((EventType) key[eventIndex]); // get the bit set representation of the event type - addBitSetToKeyAtLocation(bitSetKey, eventBitSet, eventTypeBitIndex); // add the event type + long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on + for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) + masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset); - return bitSetKey; + final int eventIndex = key.length - 1; // the event type is always the last key + final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type + masterKey |= (eventKey << nRequiredBits); + + if (optionalCovariatesInfo.size() > 0) { + final int covariateIndex = requiredCovariatesInfo.size(); // the optional covariate index in the key array + final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's + final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index + final OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information + + final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface + masterKey |= (covariateKey << optionalCovariateOffset); + masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset); + } + + if ( masterKey < 0 ) + System.out.println("*** " + masterKey); + + return masterKey; } /** @@ -176,34 +182,34 @@ public class BQSRKeyManager { * @param id the string or short representation of the optional covariate id * @return the short representation of the optional covariate id. */ - private short parseCovariateID(Object id) { + private short parseCovariateID(final Object id) { return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id; } /** - * Generates a key set of objects from a combined bitset key. + * Generates a key set of objects from a combined master key. * * Masks out each covariate independently and decodes their values (Object) into a keyset * - * @param key the bitset representation of the keys + * @param master the master representation of the keys * @return an object array with the values for each key */ - public List keySetFrom(BitSet key) { - List objectKeys = new ArrayList(); + public List keySetFrom(final Long master) { + final List objectKeys = new ArrayList(); for (RequiredCovariateInfo info : requiredCovariatesInfo) { - BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset - objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface + final Long covariateKey = extractKeyFromMaster(master, info.mask, info.offset); // get the covariate's key + objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface } if (optionalCovariatesInfo.size() > 0) { - BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set - BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is) - short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short - Covariate covariate = optionalCovariatesInfo.get(id).covariate; // get the corresponding optional covariate object - objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set + final Long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key + final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is) + Covariate covariate = optionalCovariatesInfo.get((short)covIDKey).covariate; // get the corresponding optional covariate object + objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id } - objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set + + objectKeys.add(EventType.eventFrom((int)extractKeyFromMaster(master, eventIDMask, nRequiredBits))); // add the event type object to the key set return objectKeys; } @@ -217,77 +223,35 @@ public class BQSRKeyManager { } /** - * Translates a masked bitset into a bitset starting at 0 + * Creates a mask for the requested covariate to extract the relevant key from a combined master key * - * @param key the masked out bitset - * @param n the number of bits to chop - * @return a translated bitset starting at 0 for the covariate machinery to decode + * @param offset the offset into the master key + * @param nBits the number of bits needed by the Covariate to represent its values + * @return the mask relevant to the covariate */ - private BitSet chopNBitsFrom(BitSet key, int n) { - BitSet choppedKey = new BitSet(); - for (int i = key.nextSetBit(0); i >= 0; i = key.nextSetBit(i + 1)) - choppedKey.set(i - n); // Set every bit translocated to the beginning of the BitSet - return choppedKey; - } - - /** - * Creates a mask for the requested covariate to extract the relevant bitset from a combined bitset key - * - * @param leadingBits the index of the covariate in the ordered covariate list - * @param nBits the number of bits needed by the Covariate to represent its values in BitSet form - * @return the bitset relevant to the covariate - */ - - private BitSet genericMask(int leadingBits, int nBits) { - BitSet mask = new BitSet(leadingBits + nBits); - mask.set(leadingBits, leadingBits + nBits); + private long genericMask(final int offset, final int nBits) { + long mask = 0L; + for ( int i = 0; i < nBits; i++ ) + mask |= 1L << (offset+i); return mask; } - /** - * Decodes the event type (enum) from the full bitset key - * - * @param fullKey the full key of all covariates + event type - * @return the decoded event type. - */ - private EventType eventFromBitSet(BitSet fullKey) { - BitSet eventKey = new BitSet(); - int firstBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; - for (int i = fullKey.nextSetBit(firstBitIndex); i >= 0; i = fullKey.nextSetBit(i + 1)) - eventKey.set(i - firstBitIndex); - return EventType.eventFrom(BitSetUtils.shortFrom(eventKey)); + private long extractKeyFromMaster(final long master, final long mask, final int offset) { + long key = master & mask; + return key >> offset; } - // cache the BitSet representing an event since it's otherwise created a massive amount of times - private static final Map eventTypeCache = new HashMap(EventType.values().length); + // cache the key representing an event since it's otherwise created a massive amount of times + private static final Map eventTypeCache = new HashMap(EventType.values().length); // event IDs must be longs so that bit-fiddling works static { for (final EventType eventType : EventType.values()) - eventTypeCache.put(eventType, BitSetUtils.bitSetFrom(eventType.index)); + eventTypeCache.put(eventType, (long)eventType.index); } - private BitSet bitSetFromEvent(final EventType eventType) { + private Long keyFromEvent(final EventType eventType) { return eventTypeCache.get(eventType); } - private BitSet bitSetFromId(final short id) { - return BitSetUtils.bitSetFrom(id); - } - - private int bitsInEventType() { - return BitSetUtils.numberOfBitsToRepresent(EventType.values().length); - } - - private void addBitSetToKeyAtLocation(BitSet key, BitSet bitSet, int location) { - for (int j = bitSet.nextSetBit(0); j >= 0; j = bitSet.nextSetBit(j + 1)) - key.set(j + location); // translate the bits set in the key to their corresponding position in the full key - } - - private BitSet extractBitSetFromKey (BitSet key, BitSet mask, int leadingBits) { - BitSet bitSet = (BitSet) key.clone(); - bitSet.and(mask); - return chopNBitsFrom(bitSet, leadingBits); - } - @Override public boolean equals(Object o) { if (!(o instanceof BQSRKeyManager)) @@ -322,27 +286,50 @@ public class BQSRKeyManager { return true; } + /** + * Calculates the number of bits necessary to represent a given number of elements + * + * @param numberOfElements the number of elements to represent (must be positive) + * @return the number of bits necessary to represent this many elements + */ + public static int numberOfBitsToRepresent(long numberOfElements) { + if (numberOfElements < 0) + throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements); + + if (numberOfElements == 1L) + return 1; // special case + + int n = 0; + numberOfElements--; + while (numberOfElements > 0) { + numberOfElements = numberOfElements >> 1; + n++; + } + return n; + } /** * Aggregate information for each Covariate */ - class RequiredCovariateInfo { - public final int bitsBefore; // number of bits before this covariate in the combined bitset key - public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits ) + private static class RequiredCovariateInfo { + public final int nBits; // number of bits for this key + public final int offset; // the offset into the master key + public final long mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits ) public final Covariate covariate; // this allows reverse lookup of the Covariates in order - RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) { - this.bitsBefore = bitsBefore; + RequiredCovariateInfo(final int nBits, final int offset, final long mask, final Covariate covariate) { + this.nBits = nBits; + this.offset = offset; this.mask = mask; this.covariate = covariate; } } - class OptionalCovariateInfo { - public final BitSet covariateID; // cache the covariate ID + private static class OptionalCovariateInfo { + public final long covariateID; // cache the covariate ID (must be a long so that bit-fiddling works) public final Covariate covariate; - OptionalCovariateInfo(BitSet covariateID, Covariate covariate) { + OptionalCovariateInfo(final long covariateID, final Covariate covariate) { this.covariateID = covariateID; this.covariate = covariate; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index e2663359b..94b2dfe50 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -26,14 +26,13 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; -import java.util.BitSet; /** * Created by IntelliJ IDEA. @@ -65,12 +64,12 @@ public class ContextCovariate implements StandardCovariate { @Override public CovariateValues getValues(final GATKSAMRecord read) { - int l = read.getReadLength(); - BitSet[] mismatches = new BitSet[l]; - BitSet[] insertions = new BitSet[l]; - BitSet[] deletions = new BitSet[l]; + final int l = read.getReadLength(); + final Long[] mismatches = new Long[l]; + final Long[] insertions = new Long[l]; + final Long[] deletions = new Long[l]; - GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context + final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag(); byte[] bases = clippedRead.getReadBases(); @@ -98,21 +97,21 @@ public class ContextCovariate implements StandardCovariate { } @Override - public String keyFromBitSet(BitSet key) { + public String formatKey(final Long key) { if (key == null) // this can only happen in test routines because we do not propagate null keys to the csv file return null; - return BitSetUtils.dnaFrom(key); + return contextFromKey(key); } @Override - public BitSet bitSetFromKey(Object key) { - return BitSetUtils.bitSetFrom((String) key); + public Long longFromKey(Object key) { + return longFrom((String) key); } @Override public int numberOfBits() { - return Long.bitCount(-1L); + return Integer.bitCount(1); } /** @@ -121,14 +120,14 @@ public class ContextCovariate implements StandardCovariate { * @param bases the bases in the read to build the context from * @param offset the position in the read to calculate the context for * @param contextSize context size to use building the context - * @return the bitSet representing the Context + * @return the key representing the context */ - private BitSet contextWith(byte[] bases, int offset, int contextSize) { - BitSet result = null; + private Long contextWith(byte[] bases, int offset, int contextSize) { + Long result = null; if (offset - contextSize + 1 >= 0) { final byte[] context = Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1); if (!BaseUtils.containsBase(context, BaseUtils.N)) - result = BitSetUtils.bitSetFrom(context); + result = keyFromContext(context); } return result; } @@ -146,4 +145,125 @@ public class ContextCovariate implements StandardCovariate { array[r] = temp; } } + + static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion. + static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length + + public static Long longFrom(final String dna) { + return keyFromContext(dna.getBytes()); + } + + /** + * Creates a Long representation of a given dna string. + * + * Warning: This conversion is limited to long precision, therefore the dna sequence cannot + * be longer than 31 bases. + * + * The bit representation of a dna string is the simple: + * 0 A 4 AA 8 CA + * 1 C 5 AC ... + * 2 G 6 AG 1343 TTGGT + * 3 T 7 AT 1364 TTTTT + * + * To convert from dna to number, we convert the dna string to base10 and add all combinations that + * preceded the string (with smaller lengths). + * + * @param dna the dna sequence + * @return the key representing the dna sequence + */ + public static Long keyFromContext(final byte[] dna) { + if (dna.length > MAX_DNA_CONTEXT) + throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length)); + + final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string + long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set + for (final byte base : dna) { + baseTen = baseTen << 2; // multiply by 4 + baseTen += (long)BaseUtils.simpleBaseToBaseIndex(base); + } + return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. + } + + /** + * The sum of all combinations of a context of a given length from length = 0 to length. + * + * Memoized implementation of sum(4^i) , where i=[0,length] + * + * @param length the length of the DNA context + * @return the sum of all combinations leading up to this context length. + */ + private static long combinationsFor(int length) { + if (length > MAX_DNA_CONTEXT) + throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length)); + + // only calculate the number of combinations if the table hasn't already cached the value + if (length > 0 && combinationsPerLength[length] == 0) { + long combinations = 0L; + for (int i = 1; i <= length; i++) + combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) ) + combinationsPerLength[length] = combinations; + } + return combinationsPerLength[length]; + } + + /** + * Converts a key into the dna string representation. + * + * Warning: This conversion is limited to long precision, therefore the dna sequence cannot + * be longer than 31 bases. + * + * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the + * base_10 representation of the sequence. This is important for us to know how to bring the number + * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented + * as 0's and leading 0's are omitted). + * + * quasi-canonical because A is represented by a 0, therefore, + * instead of : 0, 1, 2, 3, 10, 11, 12, ... + * we have : 0, 1, 2, 3, 00, 01, 02, ... + * + * but we can correctly decode it because we know the final length. + * + * @param key the key representing the dna sequence + * @return the dna sequence represented by the key + */ + public static String contextFromKey(Long key) { + if (key < 0) + throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); + + final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls) + key -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation + + StringBuilder dna = new StringBuilder(); + while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical) + final byte base = (byte) (key & 3); // equivalent to (key % 4) + dna.append((char)BaseUtils.baseIndexToSimpleBase(base)); + key = key >> 2; // divide by 4 + } + for (int j = dna.length(); j < length; j++) + dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above) + + return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along + } + + /** + * Calculates the length of the DNA context for a given base 10 number + * + * It is important to know the length given the base 10 number to calculate the number of combinations + * and to disambiguate the "quasi-canonical" state. + * + * This method also calculates the number of combinations as a by-product, but since it memoizes the + * results, a subsequent call to combinationsFor(length) is O(1). + * + * @param number the base 10 representation of the key + * @return the length of the DNA context represented by this number + */ + private static int contextLengthFor(final long number) { + int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet. + long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it). + while (combinations <= number) { // find the length of the dna string (length) + length++; + combinations = combinationsFor(length); // calculate the next context + } + return length; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java index 6b872a50c..814bbe507 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Covariate.java @@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.BitSet; - /* * Copyright (c) 2009 The Broad Institute * @@ -45,7 +43,7 @@ public interface Covariate { * * @param RAC the recalibration argument collection */ - public void initialize(RecalibrationArgumentCollection RAC); + public void initialize(final RecalibrationArgumentCollection RAC); /** * Calculates covariate values for all positions in the read. @@ -53,7 +51,7 @@ public interface Covariate { * @param read the read to calculate the covariates on. * @return all the covariate values for every base in the read. */ - public CovariateValues getValues(GATKSAMRecord read); + public CovariateValues getValues(final GATKSAMRecord read); /** * Used to get the covariate's value from input csv file during on-the-fly recalibration @@ -61,26 +59,26 @@ public interface Covariate { * @param str the key in string type (read from the csv) * @return the key in it's correct type. */ - public Object getValue(String str); + public Object getValue(final String str); /** - * Converts the bitset representation of the key (used internally for table indexing) to String format for file output. + * Converts the internal representation of the key to String format for file output. * - * @param key the bitset representation of the key + * @param key the Long representation of the key * @return a string representation of the key */ - public String keyFromBitSet(BitSet key); + public String formatKey(final Long key); /** - * Converts a key into a bitset + * Converts an Object key into a Long key using only the lowest numberOfBits() bits * - * Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in bitset format. For counting covariates - * the getValues method already returns all values in BitSet format. + * Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates + * the getValues method already returns all values in Long format. * * @param key the object corresponding to the covariate - * @return a bitset representation of the object + * @return a long representation of the object */ - public BitSet bitSetFromKey(Object key); + public Long longFromKey(final Object key); /** * Each covariate should determine how many bits are necessary to encode it's data diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java index ebf90ebfd..960c51325 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CovariateValues.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import java.util.BitSet; - /** * An object to hold the different covariate values for all bases in the read. * @@ -14,25 +12,25 @@ import java.util.BitSet; * @since 2/8/12 */ public class CovariateValues { - private final BitSet[] mismatches; - private final BitSet[] insertions; - private final BitSet[] deletions; + private final Long[] mismatches; + private final Long[] insertions; + private final Long[] deletions; - public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) { + public CovariateValues(Long[] mismatch, Long[] insertion, Long[] deletion) { this.mismatches = mismatch; this.insertions = insertion; this.deletions = deletion; } - public BitSet[] getMismatches() { + public Long[] getMismatches() { return mismatches; } - public BitSet[] getInsertions() { + public Long[] getInsertions() { return insertions; } - public BitSet[] getDeletions() { + public Long[] getDeletions() { return deletions; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 50e9b0447..a072c4931 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.BitSet; import java.util.EnumSet; /* @@ -61,7 +60,7 @@ public class CycleCovariate implements StandardCovariate { // Used to pick out the covariate's value from attributes of the read @Override public CovariateValues getValues(final GATKSAMRecord read) { - BitSet[] cycles = new BitSet[read.getReadLength()]; + Long[] cycles = new Long[read.getReadLength()]; final NGSPlatform ngsPlatform = read.getNGSPlatform(); // Discrete cycle platforms @@ -81,7 +80,7 @@ public class CycleCovariate implements StandardCovariate { final int CUSHION = 4; final int MAX_CYCLE = read.getReadLength() - CUSHION - 1; for (int i = 0; i < MAX_CYCLE; i++) { - cycles[i] = (iMAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle); + cycles[i] = (iMAX_CYCLE) ? null : keyFromCycle(cycle); cycle += increment; } } @@ -108,19 +107,19 @@ public class CycleCovariate implements StandardCovariate { int iii = 0; while (iii < readLength) { while (iii < readLength && bases[iii] == (byte) 'T') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii++; } while (iii < readLength && bases[iii] == (byte) 'A') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii++; } while (iii < readLength && bases[iii] == (byte) 'C') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii++; } while (iii < readLength && bases[iii] == (byte) 'G') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii++; } if (iii < readLength) { @@ -130,7 +129,7 @@ public class CycleCovariate implements StandardCovariate { cycle++; } if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii++; } @@ -140,19 +139,19 @@ public class CycleCovariate implements StandardCovariate { int iii = readLength - 1; while (iii >= 0) { while (iii >= 0 && bases[iii] == (byte) 'T') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii--; } while (iii >= 0 && bases[iii] == (byte) 'A') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii--; } while (iii >= 0 && bases[iii] == (byte) 'C') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii--; } while (iii >= 0 && bases[iii] == (byte) 'G') { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii--; } if (iii >= 0) { @@ -162,7 +161,7 @@ public class CycleCovariate implements StandardCovariate { cycle++; } if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) { - cycles[iii] = BitSetUtils.bitSetFrom(cycle); + cycles[iii] = keyFromCycle(cycle); iii--; } } @@ -184,17 +183,29 @@ public class CycleCovariate implements StandardCovariate { } @Override - public String keyFromBitSet(BitSet key) { - return String.format("%d", BitSetUtils.shortFrom(key)); + public String formatKey(final Long key) { + long cycle = key >> 1; // shift so we can remove the "sign" bit + if ( (key & 1) != 0 ) // is the last bit set? + cycle *= -1; // then the cycle is negative + return String.format("%d", cycle); } @Override - public BitSet bitSetFromKey(Object key) { - return (key instanceof String) ? BitSetUtils.bitSetFrom(Short.parseShort((String) key)) : BitSetUtils.bitSetFrom((Short) key); + public Long longFromKey(final Object key) { + return (key instanceof String) ? keyFromCycle(Short.parseShort((String) key)) : keyFromCycle((Short) key); } @Override public int numberOfBits() { - return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative + return BQSRKeyManager.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative + } + + private static Long keyFromCycle(final short cycle) { + // no negative values because + long result = Math.abs(cycle); + result = result << 1; // shift so we can add the "sign" bit + if ( cycle < 0 ) + result++; // negative cycles get the lower-most bit set + return result; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java index 4100eb8bb..b3fcc024c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QualityScoreCovariate.java @@ -1,11 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.BitSet; - /* * Copyright (c) 2009 The Broad Institute * @@ -50,18 +47,18 @@ public class QualityScoreCovariate implements RequiredCovariate { public CovariateValues getValues(final GATKSAMRecord read) { int readLength = read.getReadLength(); - BitSet[] mismatches = new BitSet[readLength]; - BitSet[] insertions = new BitSet[readLength]; - BitSet[] deletions = new BitSet[readLength]; + Long[] mismatches = new Long[readLength]; + Long[] insertions = new Long[readLength]; + Long[] deletions = new Long[readLength]; byte[] baseQualities = read.getBaseQualities(); byte[] baseInsertionQualities = read.getBaseInsertionQualities(); byte[] baseDeletionQualities = read.getBaseDeletionQualities(); for (int i = 0; i < baseQualities.length; i++) { - mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]); - insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]); - deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]); + mismatches[i] = (long)baseQualities[i]; + insertions[i] = (long)baseInsertionQualities[i]; + deletions[i] = (long)baseDeletionQualities[i]; } return new CovariateValues(mismatches, insertions, deletions); @@ -74,17 +71,17 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public String keyFromBitSet(BitSet key) { - return String.format("%d", BitSetUtils.longFrom(key)); + public String formatKey(final Long key) { + return String.format("%d", key); } @Override - public BitSet bitSetFromKey(Object key) { - return (key instanceof String) ? BitSetUtils.bitSetFrom(Byte.parseByte((String) key)) : BitSetUtils.bitSetFrom((Byte) key); + public Long longFromKey(final Object key) { + return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key; } @Override public int numberOfBits() { - return BitSetUtils.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE); + return BQSRKeyManager.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java index c631a63ce..a6d222ba0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/QuantizationInfo.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.recalibration.QualQuantizer; import java.util.Arrays; -import java.util.BitSet; import java.util.List; import java.util.Map; @@ -31,13 +30,13 @@ public class QuantizationInfo { this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals)); } - public QuantizationInfo(Map> keysAndTablesMap, int quantizationLevels) { + public QuantizationInfo(Map> keysAndTablesMap, int quantizationLevels) { final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution for (int i = 0; i < qualHistogram.length; i++) qualHistogram[i] = 0L; - Map qualTable = null; // look for the quality score table - for (Map.Entry> entry : keysAndTablesMap.entrySet()) { + Map qualTable = null; // look for the quality score table + for (Map.Entry> entry : keysAndTablesMap.entrySet()) { BQSRKeyManager keyManager = entry.getKey(); if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites qualTable = entry.getValue(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java index 74b759da5..b307aeb17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariates.java @@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import java.util.BitSet; - /** * The object temporarily held by a read that describes all of it's covariates. * @@ -13,16 +11,16 @@ import java.util.BitSet; * @since 2/8/12 */ public class ReadCovariates { - private final BitSet[][] mismatchesKeySet; - private final BitSet[][] insertionsKeySet; - private final BitSet[][] deletionsKeySet; + private final Long[][] mismatchesKeySet; + private final Long[][] insertionsKeySet; + private final Long[][] deletionsKeySet; private int nextCovariateIndex; public ReadCovariates(int readLength, int numberOfCovariates) { - this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates]; - this.insertionsKeySet = new BitSet[readLength][numberOfCovariates]; - this.deletionsKeySet = new BitSet[readLength][numberOfCovariates]; + this.mismatchesKeySet = new Long[readLength][numberOfCovariates]; + this.insertionsKeySet = new Long[readLength][numberOfCovariates]; + this.deletionsKeySet = new Long[readLength][numberOfCovariates]; this.nextCovariateIndex = 0; } @@ -33,7 +31,7 @@ public class ReadCovariates { nextCovariateIndex++; } - public BitSet[] getKeySet(final int readPosition, final EventType errorModel) { + public Long[] getKeySet(final int readPosition, final EventType errorModel) { switch (errorModel) { case BASE_SUBSTITUTION: return getMismatchesKeySet(readPosition); @@ -46,19 +44,19 @@ public class ReadCovariates { } } - public BitSet[] getMismatchesKeySet(int readPosition) { + public Long[] getMismatchesKeySet(int readPosition) { return mismatchesKeySet[readPosition]; } - public BitSet[] getInsertionsKeySet(int readPosition) { + public Long[] getInsertionsKeySet(int readPosition) { return insertionsKeySet[readPosition]; } - public BitSet[] getDeletionsKeySet(int readPosition) { + public Long[] getDeletionsKeySet(int readPosition) { return deletionsKeySet[readPosition]; } - private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) { + private void transposeCovariateValues(Long[][] keySet, Long[] covariateValues) { for (int i = 0; i < covariateValues.length; i++) keySet[i][nextCovariateIndex] = covariateValues[i]; } @@ -66,15 +64,15 @@ public class ReadCovariates { /** * Testing routines */ - protected BitSet[][] getMismatchesKeySet() { + protected Long[][] getMismatchesKeySet() { return mismatchesKeySet; } - protected BitSet[][] getInsertionsKeySet() { + protected Long[][] getInsertionsKeySet() { return insertionsKeySet; } - protected BitSet[][] getDeletionsKeySet() { + protected Long[][] getDeletionsKeySet() { return deletionsKeySet; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java index 579643f56..0bba319c2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadGroupCovariate.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; -import java.util.BitSet; import java.util.HashMap; /* @@ -43,22 +41,21 @@ import java.util.HashMap; public class ReadGroupCovariate implements RequiredCovariate { - private final HashMap readGroupLookupTable = new HashMap(); - private final HashMap readGroupReverseLookupTable = new HashMap(); - private short nextId = 0; + private final HashMap readGroupLookupTable = new HashMap(); + private final HashMap readGroupReverseLookupTable = new HashMap(); + private long nextId = 0L; // Initialize any member variables using the command-line arguments passed to the walkers @Override - public void initialize(final RecalibrationArgumentCollection RAC) { - } + public void initialize(final RecalibrationArgumentCollection RAC) {} @Override public CovariateValues getValues(final GATKSAMRecord read) { final int l = read.getReadLength(); final String readGroupId = readGroupValueFromRG(read.getReadGroup()); - BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset - BitSet[] readGroups = new BitSet[l]; - Arrays.fill(readGroups, rg); + final long key = keyForReadGroup(readGroupId); + Long[] readGroups = new Long[l]; + Arrays.fill(readGroups, key); return new CovariateValues(readGroups, readGroups, readGroups); } @@ -68,35 +65,28 @@ public class ReadGroupCovariate implements RequiredCovariate { } @Override - public String keyFromBitSet(BitSet key) { - return decodeReadGroup((short) BitSetUtils.longFrom(key)); + public String formatKey(final Long key) { + return readGroupReverseLookupTable.get(key); } @Override - public BitSet bitSetFromKey(Object key) { - return bitSetForReadGroup((String) key); + public Long longFromKey(Object key) { + return keyForReadGroup((String) key); } @Override public int numberOfBits() { - return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE); + return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE); } - private String decodeReadGroup(final short id) { - return readGroupReverseLookupTable.get(id); - } - - private BitSet bitSetForReadGroup(String readGroupId) { - short shortId; - if (readGroupLookupTable.containsKey(readGroupId)) - shortId = readGroupLookupTable.get(readGroupId); - else { - shortId = nextId; + private Long keyForReadGroup(final String readGroupId) { + if (!readGroupLookupTable.containsKey(readGroupId)) { readGroupLookupTable.put(readGroupId, nextId); readGroupReverseLookupTable.put(nextId, readGroupId); nextId++; - } - return BitSetUtils.bitSetFrom(shortId); + } + + return readGroupLookupTable.get(readGroupId); } /** @@ -105,8 +95,8 @@ public class ReadGroupCovariate implements RequiredCovariate { * @param rg the read group record * @return platform unit or readgroup id */ - private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) { - String platformUnit = rg.getPlatformUnit(); + private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) { + final String platformUnit = rg.getPlatformUnit(); return platformUnit == null ? rg.getId() : platformUnit; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index f4c7873f4..62b9ed13d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -149,17 +149,17 @@ public class RecalDataManager { * @param optionalCovariates list of optional covariates (in order) * @return a map with each key manager and it's corresponding recalibration table properly initialized */ - public static LinkedHashMap> initializeTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { - final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); - ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. - ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + public static LinkedHashMap> initializeTables(ArrayList requiredCovariates, ArrayList optionalCovariates) { + final LinkedHashMap> tablesAndKeysMap = new LinkedHashMap>(); + final ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + final ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables for (Covariate covariate : requiredCovariates) { requiredCovariatesToAdd.add(covariate); - final Map recalTable = new HashMap(); // initializing a new recal table for each required covariate (cumulatively) + final Map recalTable = new HashMap(); // initializing a new recal table for each required covariate (cumulatively) final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map } - final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates + final Map recalTable = new HashMap(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map return tablesAndKeysMap; @@ -181,7 +181,7 @@ public class RecalDataManager { final List> requiredClasses = new PluginManager(RequiredCovariate.class).getPlugins(); final List> standardClasses = new PluginManager(StandardCovariate.class).getPlugins(); - ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates + final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates ArrayList optionalCovariates = new ArrayList(); if (argumentCollection.USE_STANDARD_COVARIATES) optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user @@ -223,7 +223,7 @@ public class RecalDataManager { logger.info(""); } - private static List generateReportTables(Map> keysAndTablesMap) { + private static List generateReportTables(Map> keysAndTablesMap) { List result = new LinkedList(); int tableIndex = 0; @@ -235,19 +235,19 @@ public class RecalDataManager { final Pair nObservations = new Pair(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d"); final Pair nErrors = new Pair(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d"); - for (Map.Entry> entry : keysAndTablesMap.entrySet()) { - BQSRKeyManager keyManager = entry.getKey(); - Map recalTable = entry.getValue(); + for (Map.Entry> entry : keysAndTablesMap.entrySet()) { + final BQSRKeyManager keyManager = entry.getKey(); + final Map recalTable = entry.getValue(); - boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs. + final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs. - List requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table - List optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table + final List requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table + final List optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table - ArrayList> columnNames = new ArrayList>(); // initialize the array to hold the column names + final ArrayList> columnNames = new ArrayList>(); // initialize the array to hold the column names - for (Covariate covariate : requiredList) { - String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order + for (final Covariate covariate : requiredList) { + final String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order columnNames.add(new Pair(name, "%s")); // save the required covariate name so we can reference it in the future } @@ -263,30 +263,30 @@ public class RecalDataManager { columnNames.add(nObservations); columnNames.add(nErrors); - GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size()); - for (Pair columnName : columnNames) + final GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size()); + for (final Pair columnName : columnNames) reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type int rowIndex = 0; - for (Map.Entry recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys - BitSet bitSetKey = recalTableEntry.getKey(); - Map columnData = new HashMap(columnNames.size()); - Iterator> iterator = columnNames.iterator(); - for (Object key : keyManager.keySetFrom(bitSetKey)) { - String columnName = iterator.next().getFirst(); + for (Map.Entry recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys + final Long bitSetKey = recalTableEntry.getKey(); + final Map columnData = new HashMap(columnNames.size()); + final Iterator> iterator = columnNames.iterator(); + for (final Object key : keyManager.keySetFrom(bitSetKey)) { + final String columnName = iterator.next().getFirst(); columnData.put(columnName, key); } - RecalDatum datum = recalTableEntry.getValue(); + final RecalDatum datum = recalTableEntry.getValue(); columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality()); if (isReadGroupTable) columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table columnData.put(iterator.next().getFirst(), datum.numObservations); columnData.put(iterator.next().getFirst(), datum.numMismatches); - for (Map.Entry dataEntry : columnData.entrySet()) { - String columnName = dataEntry.getKey(); - Object value = dataEntry.getValue(); + for (final Map.Entry dataEntry : columnData.entrySet()) { + final String columnName = dataEntry.getKey(); + final Object value = dataEntry.getValue(); reportTable.set(rowIndex, columnName, value.toString()); } rowIndex++; @@ -296,16 +296,16 @@ public class RecalDataManager { return result; } - public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map> keysAndTablesMap, PrintStream outputFile) { + public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map> keysAndTablesMap, PrintStream outputFile) { outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile); } - public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, PrintStream outputFile) { + public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, PrintStream outputFile) { outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile); } private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List recalTables, PrintStream outputFile) { - GATKReport report = new GATKReport(); + final GATKReport report = new GATKReport(); report.addTable(argumentTable); report.addTable(quantizationTable); report.addTables(recalTables); @@ -328,7 +328,7 @@ public class RecalDataManager { final File plotFileName = new File(csvFileName + ".pdf"); files.getFirst().close(); - RScriptExecutor executor = new RScriptExecutor(); + final RScriptExecutor executor = new RScriptExecutor(); executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class)); executor.addArgs(csvFileName.getAbsolutePath()); executor.addArgs(plotFileName.getAbsolutePath()); @@ -340,32 +340,32 @@ public class RecalDataManager { } - public static void generateRecalibrationPlot(File filename, LinkedHashMap> original, boolean keepIntermediates) { - Pair files = initializeRecalibrationPlot(filename); + public static void generateRecalibrationPlot(File filename, LinkedHashMap> original, boolean keepIntermediates) { + final Pair files = initializeRecalibrationPlot(filename); writeCSV(files.getFirst(), original, "ORIGINAL", true); outputRecalibrationPlot(files, keepIntermediates); } - public static void generateRecalibrationPlot(File filename, LinkedHashMap> original, LinkedHashMap> recalibrated, boolean keepIntermediates) { - Pair files = initializeRecalibrationPlot(filename); + public static void generateRecalibrationPlot(File filename, LinkedHashMap> original, LinkedHashMap> recalibrated, boolean keepIntermediates) { + final Pair files = initializeRecalibrationPlot(filename); writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true); writeCSV(files.getFirst(), original, "ORIGINAL", false); outputRecalibrationPlot(files, keepIntermediates); } - private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap> map, String recalibrationMode, boolean printHeader) { + private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap> map, String recalibrationMode, boolean printHeader) { final int QUALITY_SCORE_COVARIATE_INDEX = 1; - final Map deltaTable = new HashMap(); + final Map deltaTable = new HashMap(); BQSRKeyManager deltaKeyManager = null; - for (Map.Entry> tableEntry : map.entrySet()) { - BQSRKeyManager keyManager = tableEntry.getKey(); + for (Map.Entry> tableEntry : map.entrySet()) { + final BQSRKeyManager keyManager = tableEntry.getKey(); if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table // create a key manager for the delta table final List requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates().get(0)); // include the read group covariate as the only required covariate - List optionalCovariates = new ArrayList(); + final List optionalCovariates = new ArrayList(); optionalCovariates.add(keyManager.getRequiredCovariates().get(1)); // include the quality score covariate as an optional covariate optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager @@ -376,37 +376,37 @@ public class RecalDataManager { throw new ReviewedStingException ("Couldn't find the covariates table"); boolean readyToPrint = false; - for (Map.Entry> tableEntry : map.entrySet()) { - BQSRKeyManager keyManager = tableEntry.getKey(); + for (Map.Entry> tableEntry : map.entrySet()) { + final BQSRKeyManager keyManager = tableEntry.getKey(); if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table - Map table = tableEntry.getValue(); + final Map table = tableEntry.getValue(); // add the quality score table to the delta table - for (Map.Entry entry : table.entrySet()) { // go through every element in the covariates table to create the delta table - RecalDatum recalDatum = entry.getValue(); // the current element (recal datum) + for (final Map.Entry entry : table.entrySet()) { // go through every element in the covariates table to create the delta table + final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum) - List covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key - List newCovs = new ArrayList(4); + final List covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key + final List newCovs = new ArrayList(4); newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score newCovs.add(1, covs.get(1)); newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate) newCovs.add(3, covs.get(2)); - BitSet deltaKey = deltaKeyManager.bitSetFromKey(newCovs.toArray()); // create a new bitset key for the delta table + final Long deltaKey = deltaKeyManager.longFromKey(newCovs.toArray()); // create a new bitset key for the delta table addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table } } else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table - Map table = tableEntry.getValue(); + final Map table = tableEntry.getValue(); // add the optional covariates to the delta table - for (Map.Entry entry : table.entrySet()) { // go through every element in the covariates table to create the delta table - RecalDatum recalDatum = entry.getValue(); // the current element (recal datum) + for (final Map.Entry entry : table.entrySet()) { // go through every element in the covariates table to create the delta table + final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum) - List covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key + final List covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS) - BitSet deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table + final Long deltaKey = deltaKeyManager.longFromKey(covs.toArray()); // create a new bitset key for the delta table addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table } readyToPrint = true; @@ -416,7 +416,7 @@ public class RecalDataManager { if (readyToPrint) { if (printHeader) { - List header = new LinkedList(); + final List header = new LinkedList(); header.add("ReadGroup"); header.add("CovariateValue"); header.add("CovariateName"); @@ -431,9 +431,9 @@ public class RecalDataManager { } // print each data line - for(Map.Entry deltaEntry : deltaTable.entrySet()) { - List deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey()); - RecalDatum deltaDatum = deltaEntry.getValue(); + for (final Map.Entry deltaEntry : deltaTable.entrySet()) { + final List deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey()); + final RecalDatum deltaDatum = deltaEntry.getValue(); deltaTableFile.print(Utils.join(",", deltaKeys)); deltaTableFile.print("," + deltaDatum.stringForCSV()); deltaTableFile.println("," + recalibrationMode); @@ -453,8 +453,8 @@ public class RecalDataManager { * @param deltaKey the key to the table * @param recalDatum the recal datum to combine with the accuracyDatum element in the table */ - private static void addToDeltaTable(Map deltaTable, BitSet deltaKey, RecalDatum recalDatum) { - RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key + private static void addToDeltaTable(Map deltaTable, Long deltaKey, RecalDatum recalDatum) { + final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key if (deltaDatum == null) deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum else diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index 43eb2ba34..9ac1d0562 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -18,8 +18,8 @@ import java.util.*; */ public class RecalibrationReport { private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done) - private final LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager - private final ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation + private final LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager + private final ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter @@ -42,15 +42,15 @@ public class RecalibrationReport { for (Covariate cov : requestedCovariates) cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection - keysAndTablesMap = new LinkedHashMap>(); - ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. - ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables + keysAndTablesMap = new LinkedHashMap>(); + ArrayList requiredCovariatesToAdd = new ArrayList(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates. + ArrayList optionalCovariatesToAdd = new ArrayList(); // initialize an empty array of optional covariates to create the first few tables for (Covariate covariate : requiredCovariates) { requiredCovariatesToAdd.add(covariate); - final Map table; // initializing a new recal table for each required covariate (cumulatively) + final Map table; // initializing a new recal table for each required covariate (cumulatively) final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager - int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) + final int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check."; if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); @@ -69,11 +69,11 @@ public class RecalibrationReport { final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE); - final Map table = parseAllCovariatesTable(keyManager, reportTable); + final Map table = parseAllCovariatesTable(keyManager, reportTable); keysAndTablesMap.put(keyManager, table); } - protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) { + protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) { this.quantizationInfo = quantizationInfo; this.keysAndTablesMap = keysAndTablesMap; this.argumentTable = argumentTable; @@ -94,25 +94,25 @@ public class RecalibrationReport { * @param other the recalibration report to combine with this one */ public void combine(RecalibrationReport other) { - Iterator>> thisIterator = keysAndTablesMap.entrySet().iterator(); + Iterator>> thisIterator = keysAndTablesMap.entrySet().iterator(); - for (Map.Entry> otherEntry : other.getKeysAndTablesMap().entrySet()) { - Map.Entry> thisEntry = thisIterator.next(); + for (Map.Entry> otherEntry : other.getKeysAndTablesMap().entrySet()) { + Map.Entry> thisEntry = thisIterator.next(); - Map thisTable = thisEntry.getValue(); - BQSRKeyManager thisKeyManager = thisEntry.getKey(); - BQSRKeyManager otherKeyManager = otherEntry.getKey(); + final Map thisTable = thisEntry.getValue(); + final BQSRKeyManager thisKeyManager = thisEntry.getKey(); + final BQSRKeyManager otherKeyManager = otherEntry.getKey(); - for (Map.Entry otherTableEntry : otherEntry.getValue().entrySet()) { - RecalDatum otherDatum = otherTableEntry.getValue(); - BitSet otherBitKey = otherTableEntry.getKey(); - List otherObjectKey = otherKeyManager.keySetFrom(otherBitKey); + for (Map.Entry otherTableEntry : otherEntry.getValue().entrySet()) { + final RecalDatum otherDatum = otherTableEntry.getValue(); + final Long otherBitKey = otherTableEntry.getKey(); + final List otherObjectKey = otherKeyManager.keySetFrom(otherBitKey); - BitSet thisBitKey = thisKeyManager.bitSetFromKey(otherObjectKey.toArray()); - RecalDatum thisDatum = thisTable.get(thisBitKey); + final Long thisKey = thisKeyManager.longFromKey(otherObjectKey.toArray()); + final RecalDatum thisDatum = thisTable.get(thisKey); if (thisDatum == null) - thisTable.put(thisBitKey, otherDatum); + thisTable.put(thisKey, otherDatum); else thisDatum.combine(otherDatum); } @@ -123,7 +123,7 @@ public class RecalibrationReport { return quantizationInfo; } - public LinkedHashMap> getKeysAndTablesMap() { + public LinkedHashMap> getKeysAndTablesMap() { return keysAndTablesMap; } @@ -138,7 +138,7 @@ public class RecalibrationReport { * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + private Map parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { ArrayList columnNamesOrderedList = new ArrayList(5); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); @@ -155,7 +155,7 @@ public class RecalibrationReport { * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + private Map parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { ArrayList columnNamesOrderedList = new ArrayList(3); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME); @@ -170,7 +170,7 @@ public class RecalibrationReport { * @param reportTable the GATKReport table containing data for this table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { + private Map parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) { ArrayList columnNamesOrderedList = new ArrayList(2); columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME); columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME); @@ -185,26 +185,26 @@ public class RecalibrationReport { * @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table * @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key. */ - private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList columnNamesOrderedList, boolean hasEstimatedQReportedColumn) { - Map result = new HashMap(reportTable.getNumRows()*2); + private Map genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList columnNamesOrderedList, boolean hasEstimatedQReportedColumn) { + final Map result = new HashMap(reportTable.getNumRows()*2); for ( int i = 0; i < reportTable.getNumRows(); i++ ) { - int nKeys = columnNamesOrderedList.size(); - Object [] keySet = new Object[nKeys]; + final int nKeys = columnNamesOrderedList.size(); + final Object [] keySet = new Object[nKeys]; for (int j = 0; j < nKeys; j++) - keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) + keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below) keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager). - BitSet bitKey = keyManager.bitSetFromKey(keySet); + final Long bitKey = keyManager.longFromKey(keySet); - long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); - long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); - double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); + final long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME); + final long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME); + final double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME); - double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table - (Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table - Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table + final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table + (Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table + Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table - RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality); + final RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality); result.put(bitKey, recalDatum); } return result; @@ -217,14 +217,14 @@ public class RecalibrationReport { * @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE */ private QuantizationInfo initializeQuantizationTable(GATKReportTable table) { - Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1]; - Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; + final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1]; + final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1]; for ( int i = 0; i < table.getNumRows(); i++ ) { - byte originalQual = (byte)i; - Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); - Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); - byte quantizedQual = Byte.parseByte(quantizedObject.toString()); - long quantizedCount = Long.parseLong(countObject.toString()); + final byte originalQual = (byte)i; + final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME); + final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME); + final byte quantizedQual = Byte.parseByte(quantizedObject.toString()); + final long quantizedCount = Long.parseLong(countObject.toString()); quals[originalQual] = quantizedQual; counts[originalQual] = quantizedCount; } @@ -238,7 +238,7 @@ public class RecalibrationReport { * @return a RAC object properly initialized with all the objects in the table */ private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) { - RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); for ( int i = 0; i < table.getNumRows(); i++ ) { final String argument = table.get(i, "Argument").toString(); @@ -306,7 +306,7 @@ public class RecalibrationReport { * and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer. */ public void calculateEmpiricalAndQuantizedQualities() { - for (Map table : keysAndTablesMap.values()) + for (Map table : keysAndTablesMap.values()) for (RecalDatum datum : table.values()) datum.calcCombinedEmpiricalQuality(); @@ -331,26 +331,26 @@ public class RecalibrationReport { return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap); } - private boolean isEqualTable(LinkedHashMap> t1, LinkedHashMap> t2) { + private boolean isEqualTable(LinkedHashMap> t1, LinkedHashMap> t2) { if (t1.size() != t2.size()) return false; - Iterator>> t1Iterator = t1.entrySet().iterator(); - Iterator>> t2Iterator = t2.entrySet().iterator(); + final Iterator>> t1Iterator = t1.entrySet().iterator(); + final Iterator>> t2Iterator = t2.entrySet().iterator(); while (t1Iterator.hasNext() && t2Iterator.hasNext()) { - Map.Entry> t1MapEntry = t1Iterator.next(); - Map.Entry> t2MapEntry = t2Iterator.next(); + Map.Entry> t1MapEntry = t1Iterator.next(); + Map.Entry> t2MapEntry = t2Iterator.next(); if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey()))) return false; - Map table2 = t2MapEntry.getValue(); - for (Map.Entry t1TableEntry : t1MapEntry.getValue().entrySet()) { - BitSet t1Key = t1TableEntry.getKey(); + final Map table2 = t2MapEntry.getValue(); + for (Map.Entry t1TableEntry : t1MapEntry.getValue().entrySet()) { + final Long t1Key = t1TableEntry.getKey(); if (!table2.containsKey(t1Key)) return false; - RecalDatum t1Datum = t1TableEntry.getValue(); + final RecalDatum t1Datum = t1TableEntry.getValue(); if (!t1Datum.equals(table2.get(t1Key))) return false; } diff --git a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java index aa7cd4b37..0f61b20d7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java @@ -1,9 +1,5 @@ package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.io.ByteArrayOutputStream; -import java.io.ObjectOutputStream; import java.util.BitSet; import java.util.HashMap; import java.util.Map; @@ -16,10 +12,8 @@ import java.util.Map; */ public class BitSetUtils { - static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion. static final private byte NBITS_LONG_REPRESENTATION = 64; // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers) static final private byte NBITS_SHORT_REPRESENTATION = 16; // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers) - static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length /** * Creates an long out of a bitset @@ -112,173 +106,4 @@ public class BitSetUtils { } return bitSet; } - - /** - * Converts a BitSet into the dna string representation. - * - * Warning: This conversion is limited to long precision, therefore the dna sequence cannot - * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create - * a bitSetFrom(BigNumber) method. - * - * We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the - * base_10 representation of the sequence. This is important for us to know how to bring the number - * to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented - * as 0's and leading 0's are omitted). - * - * quasi-canonical because A is represented by a 0, therefore, - * instead of : 0, 1, 2, 3, 10, 11, 12, ... - * we have : 0, 1, 2, 3, 00, 01, 02, ... - * - * but we can correctly decode it because we know the final length. - * - * @param bitSet the bitset representation of the dna sequence - * @return the dna sequence represented by the bitset - */ - public static String dnaFrom(final BitSet bitSet) { - long number = longFrom(bitSet); // the base_10 representation of the bit set - if (number < 0) - throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); - - final int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls) - number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation - - StringBuilder dna = new StringBuilder(); - while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical) - byte base = (byte) (number % 4); - switch (base) { - case 0: - dna.append('A'); - break; - case 1: - dna.append('C'); - break; - case 2: - dna.append('G'); - break; - case 3: - dna.append('T'); - break; - } - number /= 4; - } - for (int j = dna.length(); j < length; j++) - dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above) - - return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along - } - - /** - * Creates a BitSet representation of a given dna string. - * - * Warning: This conversion is limited to long precision, therefore the dna sequence cannot - * be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create - * a bitSetFrom(BigNumber) method. - * - * The bit representation of a dna string is the simple: - * 0 A 4 AA 8 CA - * 1 C 5 AC ... - * 2 G 6 AG 1343 TTGGT - * 3 T 7 AT 1364 TTTTT - * - * To convert from dna to number, we convert the dna string to base10 and add all combinations that - * preceded the string (with smaller lengths). - * - * @param dna the dna sequence - * @return the bitset representing the dna sequence - */ - public static BitSet bitSetFrom(String dna) { - return bitSetFrom(dna.getBytes()); - } - - public static BitSet bitSetFrom(final byte[] dna) { - if (dna.length > MAX_DNA_CONTEXT) - throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length)); - - final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string - long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set - for (final byte base : dna) { - baseTen *= 4; - baseTen += BaseUtils.simpleBaseToBaseIndex(base); - } - return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. - } - - /** - * Calculates the number of bits necessary to represent a given number of elements - * - * @param numberOfElements the number of elements to represent (must be positive) - * @return the number of bits necessary to represent this many elements - */ - public static int numberOfBitsToRepresent(long numberOfElements) { - if (numberOfElements < 0) - throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements); - - if (numberOfElements == 1L) - return 1; // special case - - int n = 0; - numberOfElements--; - while (numberOfElements > 0) { - numberOfElements = numberOfElements >> 1; - n++; - } - return n; - } - - /** - * Calculates the length of the DNA context for a given base 10 number - * - * It is important to know the length given the base 10 number to calculate the number of combinations - * and to disambiguate the "quasi-canonical" state. - * - * This method also calculates the number of combinations as a by-product, but since it memoizes the - * results, a subsequent call to combinationsFor(length) is O(1). - * - * @param number the base 10 representation of the bitset - * @return the length of the DNA context represented by this number - */ - private static int contextLengthFor(long number) { - int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet. - long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it). - while (combinations <= number) { // find the length of the dna string (length) - length++; - combinations = combinationsFor(length); // calculate the next context - } - return length; - } - - /** - * The sum of all combinations of a context of a given length from length = 0 to length. - * - * Memoized implementation of sum(4^i) , where i=[0,length] - * - * @param length the length of the DNA context - * @return the sum of all combinations leading up to this context length. - */ - private static long combinationsFor(int length) { - if (length > MAX_DNA_CONTEXT) - throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length)); - - // only calculate the number of combinations if the table hasn't already cached the value - if (length > 0 && combinationsPerLength[length] == 0) { - long combinations = 0L; - for (int i = 1; i <= length; i++) - combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) ) - combinationsPerLength[length] = combinations; - } - return combinationsPerLength[length]; - } - - - public static byte[] sizeOf(Object obj) throws java.io.IOException - { - ByteArrayOutputStream byteObject = new ByteArrayOutputStream(); - ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject); - objectOutputStream.writeObject(obj); - objectOutputStream.flush(); - objectOutputStream.close(); - byteObject.close(); - - return byteObject.toByteArray(); - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 137ed1256..5ed759690 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.recalibration; import org.broadinstitute.sting.gatk.walkers.bqsr.*; -import org.broadinstitute.sting.utils.BitSetUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -43,7 +42,7 @@ import java.util.*; public class BaseRecalibration { private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) - private LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager + private LinkedHashMap> keysAndTablesMap; // quick access reference to the read group table and its key manager private ArrayList requestedCovariates = new ArrayList(); // list of all covariates to be used in this calculation @@ -72,7 +71,7 @@ public class BaseRecalibration { * @param keysAndTablesMap the map of key managers and recalibration tables * @param requestedCovariates the list of requested covariates */ - protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, ArrayList requestedCovariates) { + protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap> keysAndTablesMap, ArrayList requestedCovariates) { this.quantizationInfo = quantizationInfo; this.keysAndTablesMap = keysAndTablesMap; this.requestedCovariates = requestedCovariates; @@ -94,7 +93,7 @@ public class BaseRecalibration { final byte originalQualityScore = quals[offset]; if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) - final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model + final Long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base quals[offset] = recalibratedQualityScore; } @@ -122,23 +121,23 @@ public class BaseRecalibration { * @param errorModel the event type * @return A recalibrated quality score as a byte */ - protected byte performSequentialQualityCalculation(BitSet[] key, EventType errorModel) { + protected byte performSequentialQualityCalculation(Long[] key, EventType errorModel) { final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code"; final String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here"; - final byte qualFromRead = (byte) BitSetUtils.shortFrom(key[1]); + final byte qualFromRead = (byte)(long)key[1]; double globalDeltaQ = 0.0; double deltaQReported = 0.0; double deltaQCovariates = 0.0; - for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { + for (Map.Entry> mapEntry : keysAndTablesMap.entrySet()) { BQSRKeyManager keyManager = mapEntry.getKey(); - Map table = mapEntry.getValue(); + Map table = mapEntry.getValue(); + final List bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group switch(keyManager.getRequiredCovariates().size()) { case 1: // this is the ReadGroup table - List bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group if (bitKeys.size() > 1) throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); @@ -151,7 +150,6 @@ public class BaseRecalibration { break; case 2: if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table - bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the reported quality score if (bitKeys.size() > 1) throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); @@ -162,8 +160,7 @@ public class BaseRecalibration { } } else { // this is the table with all the covariates - bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn - for (BitSet k : bitKeys) { + for (Long k : bitKeys) { final RecalDatum empiricalQualCO = table.get(k); if (empiricalQualCO != null) { double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java index c65cc3f63..c7591eb25 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManagerUnitTest.java @@ -81,7 +81,7 @@ public class BQSRKeyManagerUnitTest { } private void runTestOnRead(GATKSAMRecord read, List covariateList, int nRequired) { - final BitSet[][][] covariateKeys = new BitSet[covariateList.size()][EventType.values().length][]; + final Long[][][] covariateKeys = new Long[covariateList.size()][EventType.values().length][]; int i = 0; for (Covariate cov : covariateList) { cov.initialize(RAC); @@ -103,7 +103,7 @@ public class BQSRKeyManagerUnitTest { for (int l = 0; l < read.getReadLength(); l++) { for (int eventType = 0; eventType < EventType.values().length; eventType++) { - BitSet[] keySet = new BitSet[covariateList.size()]; + Long[] keySet = new Long[covariateList.size()]; Object[] expectedRequired = new Object[covariateList.size()]; Object[] expectedCovariate = new Object[covariateList.size()]; @@ -111,14 +111,14 @@ public class BQSRKeyManagerUnitTest { keySet[j] = covariateKeys[j][eventType][l]; if (j < nRequired) - expectedRequired[j] = covariateList.get(j).keyFromBitSet(keySet[j]); + expectedRequired[j] = covariateList.get(j).formatKey(keySet[j]); else - expectedCovariate[j - nRequired] = covariateList.get(j).keyFromBitSet(keySet[j]); + expectedCovariate[j - nRequired] = covariateList.get(j).formatKey(keySet[j]); } - List hashKeys = keyManager.bitSetsFromAllKeys(keySet, EventType.eventFrom(eventType)); + List hashKeys = keyManager.longsFromAllKeys(keySet, EventType.eventFrom(eventType)); short cov = 0; - for (BitSet key : hashKeys) { + for (Long key : hashKeys) { Object[] actual = keyManager.keySetFrom(key).toArray(); // Build the expected array diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index 5a522e81e..1f5fc6b4f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -8,8 +8,6 @@ import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import java.util.BitSet; - /** * @author Mauricio Carneiro * @since 3/1/12 @@ -36,9 +34,9 @@ public class ContextCovariateUnitTest { verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate); } - public static void verifyCovariateArray(BitSet[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { + public static void verifyCovariateArray(Long[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { for (int i = 0; i < values.length; i++) - Assert.assertEquals(contextCovariate.keyFromBitSet(values[i]), expectedContext(read, i, contextSize)); + Assert.assertEquals(contextCovariate.formatKey(values[i]), expectedContext(read, i, contextSize)); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java index dc8e091ba..6634830e2 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java @@ -48,9 +48,9 @@ public class CycleCovariateUnitTest { verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1); } - private void verifyCovariateArray(BitSet[] values, short init, short increment) { + private void verifyCovariateArray(Long[] values, short init, short increment) { for (short i = 0; i < values.length; i++) { - short actual = Short.decode(covariate.keyFromBitSet(values[i])); + short actual = Short.decode(covariate.formatKey(values[i])); int expected = init + (increment * i); // System.out.println(String.format("%d: %d, %d", i, actual, expected)); Assert.assertEquals(actual, expected); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java index a74e011c2..61231ea5e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java @@ -53,24 +53,24 @@ public class ReadCovariatesUnitTest { for (int i = 0; i < length; i++) { // check that read group is always the same - Assert.assertEquals(rgCov.keyFromBitSet(rc.getMismatchesKeySet(i)[0]), RGID); - Assert.assertEquals(rgCov.keyFromBitSet(rc.getInsertionsKeySet(i)[0]), RGID); - Assert.assertEquals(rgCov.keyFromBitSet(rc.getDeletionsKeySet(i)[0]), RGID); + Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID); + Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID); + Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID); // check quality score - Assert.assertEquals(qsCov.keyFromBitSet(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]); - Assert.assertEquals(qsCov.keyFromBitSet(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]); - Assert.assertEquals(qsCov.keyFromBitSet(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]); + Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]); // check context - Assert.assertEquals(coCov.keyFromBitSet(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); - Assert.assertEquals(coCov.keyFromBitSet(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE)); - Assert.assertEquals(coCov.keyFromBitSet(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE)); // check cycle - Assert.assertEquals(cyCov.keyFromBitSet(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); - Assert.assertEquals(cyCov.keyFromBitSet(rc.getInsertionsKeySet(i)[3]), "" + (i+1)); - Assert.assertEquals(cyCov.keyFromBitSet(rc.getDeletionsKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1)); + Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1)); } }