Refactoring of BQSRv2 to use longs (and standard bit fiddling techniques) instead of Java BitSets for performance improvements.
This commit is contained in:
parent
ff5749599d
commit
891ce51908
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -33,20 +34,21 @@ public class BQSRKeyManager {
|
|||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
private int nOptionalBits; // Number of bits used to represent the standard covaraites
|
||||
private final int nOptionalIDBits; // Number of bits used to represent the optional covariates IDs
|
||||
private final int totalNumberOfBits; // Sum of all of the above plus the event bits
|
||||
|
||||
private final BitSet optionalCovariateMask; // Standard mask for optional covariates bitset
|
||||
private final BitSet optionalCovariateIDMask; // Standard mask for optional covariates order bitset
|
||||
|
||||
|
||||
private final int optionalCovariateOffset;
|
||||
private final int optionalCovariateIDOffset;
|
||||
|
||||
private final long optionalCovariateMask; // Standard mask for optional covariates key
|
||||
private final long optionalCovariateIDMask; // Standard mask for optional covariates order key
|
||||
private final long eventIDMask; // Standard mask for event ID
|
||||
|
||||
/**
|
||||
* Initializes the KeyManager with the total number of covariates to use
|
||||
*
|
||||
* @param requiredCovariates the ordered list of required covariates
|
||||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
||||
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
|
||||
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
|
||||
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
|
|
@ -54,29 +56,35 @@ public class BQSRKeyManager {
|
|||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
for (final Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
final int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
requiredCovariatesInfo.add(new RequiredCovariateInfo(nBits, nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length);
|
||||
eventIDMask = genericMask(nRequiredBits, bitsInEventType);
|
||||
|
||||
short id = 0;
|
||||
nOptionalBits = 0;
|
||||
for (Covariate optional : optionalCovariates) {
|
||||
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
||||
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
BitSet optionalID = bitSetFromId(id); // calculate the optional covariate ID for this covariate
|
||||
optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
int nOptionalBits = 0;
|
||||
for (final Covariate optional : optionalCovariates) {
|
||||
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
optionalCovariatesInfo.add(new OptionalCovariateInfo(id, optional));
|
||||
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
}
|
||||
|
||||
nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||
optionalCovariateMask = genericMask(nRequiredBits, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||
optionalCovariateIDMask = genericMask(nRequiredBits + nOptionalBits, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||
totalNumberOfBits = nRequiredBits + nOptionalBits + nOptionalIDBits + bitsInEventType(); // total number of bits used in the final key
|
||||
optionalCovariateOffset = nRequiredBits + bitsInEventType;
|
||||
optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||
optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits;
|
||||
final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||
optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||
|
||||
final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key
|
||||
if ( totalNumberOfBits > 64 )
|
||||
throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a Long");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -93,80 +101,78 @@ public class BQSRKeyManager {
|
|||
*
|
||||
* Note: If there are no optional covariates, only one bitset key will be returned with all the required covariates and the event type
|
||||
*
|
||||
* @param allKeys The keys in bitset representation for each covariate
|
||||
* @param allKeys The keys in long representation for each covariate
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in bitset representation per covariate
|
||||
* @return one key in long representation per covariate
|
||||
*/
|
||||
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
|
||||
List<BitSet> allBitSets = new ArrayList<BitSet>(); // Generate one key per optional covariate
|
||||
|
||||
BitSet eventBitSet = bitSetFromEvent(eventType); // create a bitset with the event type
|
||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
|
||||
public List<Long> longsFromAllKeys(Long[] allKeys, EventType eventType) {
|
||||
final List<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate
|
||||
|
||||
int covariateIndex = 0;
|
||||
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
||||
masterKey |= (allKeys[covariateIndex++] << infoRequired.offset);
|
||||
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
|
||||
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
||||
final Long covariateKey = allKeys[covariateIndex++];
|
||||
if (covariateKey == null)
|
||||
continue; // do not add nulls to the final set of keys.
|
||||
|
||||
BitSet optionalKey = new BitSet(totalNumberOfBits); // create a new key for this optional covariate
|
||||
optionalKey.or(requiredKey); // import all the required covariates
|
||||
addBitSetToKeyAtLocation(optionalKey, covariateKey, nRequiredBits); // add the optional covariate right after the required covariates
|
||||
addBitSetToKeyAtLocation(optionalKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
||||
addBitSetToKeyAtLocation(optionalKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||
allBitSets.add(optionalKey); // add this key to the list of keys
|
||||
long newKey = masterKey | (covariateKey << optionalCovariateOffset);
|
||||
newKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
|
||||
if ( newKey < 0 )
|
||||
System.out.println("*** " + newKey);
|
||||
|
||||
allFinalKeys.add(newKey); // add this key to the list of keys
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||
allBitSets.add(requiredKey); // add this key to the list of keys
|
||||
}
|
||||
if (optionalCovariatesInfo.size() == 0) // special case when we have no optional covariates
|
||||
allFinalKeys.add(masterKey);
|
||||
|
||||
return allBitSets;
|
||||
return allFinalKeys;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one bitset key for the covariates represented in Object[] key
|
||||
* Generates one key for the covariates represented in Object[] key
|
||||
*
|
||||
* The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file)
|
||||
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one bitset key, not many.
|
||||
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many.
|
||||
*
|
||||
* Example key:
|
||||
* RG, QUAL, CYCLE, CYCLE_ID, EventType
|
||||
*
|
||||
* @param key list of objects produced by the required covariates followed by one or zero optional covariates.
|
||||
* @return a bitset key representing these objects. Bitset encryption is done using the covariate's interface.
|
||||
* @return a key representing these objects.
|
||||
*/
|
||||
public BitSet bitSetFromKey(Object[] key) {
|
||||
BitSet bitSetKey = new BitSet(totalNumberOfBits);
|
||||
|
||||
public Long longFromKey(Object[] key) {
|
||||
int requiredCovariate = 0;
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
int optionalCovariate = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||
|
||||
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
||||
addBitSetToKeyAtLocation(bitSetKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
||||
}
|
||||
|
||||
int eventIndex = key.length - 1; // the event type is always the last key
|
||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // location in the bit set to add the event type bits
|
||||
BitSet eventBitSet = bitSetFromEvent((EventType) key[eventIndex]); // get the bit set representation of the event type
|
||||
addBitSetToKeyAtLocation(bitSetKey, eventBitSet, eventTypeBitIndex); // add the event type
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset);
|
||||
|
||||
return bitSetKey;
|
||||
final int eventIndex = key.length - 1; // the event type is always the last key
|
||||
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
final int covariateIndex = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||
|
||||
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
if ( masterKey < 0 )
|
||||
System.out.println("*** " + masterKey);
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -176,34 +182,34 @@ public class BQSRKeyManager {
|
|||
* @param id the string or short representation of the optional covariate id
|
||||
* @return the short representation of the optional covariate id.
|
||||
*/
|
||||
private short parseCovariateID(Object id) {
|
||||
private short parseCovariateID(final Object id) {
|
||||
return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a key set of objects from a combined bitset key.
|
||||
* Generates a key set of objects from a combined master key.
|
||||
*
|
||||
* Masks out each covariate independently and decodes their values (Object) into a keyset
|
||||
*
|
||||
* @param key the bitset representation of the keys
|
||||
* @param master the master representation of the keys
|
||||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
public List<Object> keySetFrom(final Long master) {
|
||||
final List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
final Long covariateKey = extractKeyFromMaster(master, info.mask, info.offset); // get the covariate's key
|
||||
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
||||
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
||||
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
||||
Covariate covariate = optionalCovariatesInfo.get(id).covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
||||
final Long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
|
||||
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
|
||||
Covariate covariate = optionalCovariatesInfo.get((short)covIDKey).covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set
|
||||
|
||||
objectKeys.add(EventType.eventFrom((int)extractKeyFromMaster(master, eventIDMask, nRequiredBits))); // add the event type object to the key set
|
||||
|
||||
return objectKeys;
|
||||
}
|
||||
|
|
@ -217,77 +223,35 @@ public class BQSRKeyManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Translates a masked bitset into a bitset starting at 0
|
||||
* Creates a mask for the requested covariate to extract the relevant key from a combined master key
|
||||
*
|
||||
* @param key the masked out bitset
|
||||
* @param n the number of bits to chop
|
||||
* @return a translated bitset starting at 0 for the covariate machinery to decode
|
||||
* @param offset the offset into the master key
|
||||
* @param nBits the number of bits needed by the Covariate to represent its values
|
||||
* @return the mask relevant to the covariate
|
||||
*/
|
||||
private BitSet chopNBitsFrom(BitSet key, int n) {
|
||||
BitSet choppedKey = new BitSet();
|
||||
for (int i = key.nextSetBit(0); i >= 0; i = key.nextSetBit(i + 1))
|
||||
choppedKey.set(i - n); // Set every bit translocated to the beginning of the BitSet
|
||||
return choppedKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a mask for the requested covariate to extract the relevant bitset from a combined bitset key
|
||||
*
|
||||
* @param leadingBits the index of the covariate in the ordered covariate list
|
||||
* @param nBits the number of bits needed by the Covariate to represent its values in BitSet form
|
||||
* @return the bitset relevant to the covariate
|
||||
*/
|
||||
|
||||
private BitSet genericMask(int leadingBits, int nBits) {
|
||||
BitSet mask = new BitSet(leadingBits + nBits);
|
||||
mask.set(leadingBits, leadingBits + nBits);
|
||||
private long genericMask(final int offset, final int nBits) {
|
||||
long mask = 0L;
|
||||
for ( int i = 0; i < nBits; i++ )
|
||||
mask |= 1L << (offset+i);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes the event type (enum) from the full bitset key
|
||||
*
|
||||
* @param fullKey the full key of all covariates + event type
|
||||
* @return the decoded event type.
|
||||
*/
|
||||
private EventType eventFromBitSet(BitSet fullKey) {
|
||||
BitSet eventKey = new BitSet();
|
||||
int firstBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits;
|
||||
for (int i = fullKey.nextSetBit(firstBitIndex); i >= 0; i = fullKey.nextSetBit(i + 1))
|
||||
eventKey.set(i - firstBitIndex);
|
||||
return EventType.eventFrom(BitSetUtils.shortFrom(eventKey));
|
||||
private long extractKeyFromMaster(final long master, final long mask, final int offset) {
|
||||
long key = master & mask;
|
||||
return key >> offset;
|
||||
}
|
||||
|
||||
// cache the BitSet representing an event since it's otherwise created a massive amount of times
|
||||
private static final Map<EventType, BitSet> eventTypeCache = new HashMap<EventType, BitSet>(EventType.values().length);
|
||||
// cache the key representing an event since it's otherwise created a massive amount of times
|
||||
private static final Map<EventType, Long> eventTypeCache = new HashMap<EventType, Long>(EventType.values().length); // event IDs must be longs so that bit-fiddling works
|
||||
static {
|
||||
for (final EventType eventType : EventType.values())
|
||||
eventTypeCache.put(eventType, BitSetUtils.bitSetFrom(eventType.index));
|
||||
eventTypeCache.put(eventType, (long)eventType.index);
|
||||
}
|
||||
|
||||
private BitSet bitSetFromEvent(final EventType eventType) {
|
||||
private Long keyFromEvent(final EventType eventType) {
|
||||
return eventTypeCache.get(eventType);
|
||||
}
|
||||
|
||||
private BitSet bitSetFromId(final short id) {
|
||||
return BitSetUtils.bitSetFrom(id);
|
||||
}
|
||||
|
||||
private int bitsInEventType() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(EventType.values().length);
|
||||
}
|
||||
|
||||
private void addBitSetToKeyAtLocation(BitSet key, BitSet bitSet, int location) {
|
||||
for (int j = bitSet.nextSetBit(0); j >= 0; j = bitSet.nextSetBit(j + 1))
|
||||
key.set(j + location); // translate the bits set in the key to their corresponding position in the full key
|
||||
}
|
||||
|
||||
private BitSet extractBitSetFromKey (BitSet key, BitSet mask, int leadingBits) {
|
||||
BitSet bitSet = (BitSet) key.clone();
|
||||
bitSet.and(mask);
|
||||
return chopNBitsFrom(bitSet, leadingBits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof BQSRKeyManager))
|
||||
|
|
@ -322,27 +286,50 @@ public class BQSRKeyManager {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate information for each Covariate
|
||||
*/
|
||||
class RequiredCovariateInfo {
|
||||
public final int bitsBefore; // number of bits before this covariate in the combined bitset key
|
||||
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
private static class RequiredCovariateInfo {
|
||||
public final int nBits; // number of bits for this key
|
||||
public final int offset; // the offset into the master key
|
||||
public final long mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||
|
||||
RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
|
||||
this.bitsBefore = bitsBefore;
|
||||
RequiredCovariateInfo(final int nBits, final int offset, final long mask, final Covariate covariate) {
|
||||
this.nBits = nBits;
|
||||
this.offset = offset;
|
||||
this.mask = mask;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
}
|
||||
|
||||
class OptionalCovariateInfo {
|
||||
public final BitSet covariateID; // cache the covariate ID
|
||||
private static class OptionalCovariateInfo {
|
||||
public final long covariateID; // cache the covariate ID (must be a long so that bit-fiddling works)
|
||||
public final Covariate covariate;
|
||||
|
||||
OptionalCovariateInfo(BitSet covariateID, Covariate covariate) {
|
||||
OptionalCovariateInfo(final long covariateID, final Covariate covariate) {
|
||||
this.covariateID = covariateID;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,14 +26,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -65,12 +64,12 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int l = read.getReadLength();
|
||||
BitSet[] mismatches = new BitSet[l];
|
||||
BitSet[] insertions = new BitSet[l];
|
||||
BitSet[] deletions = new BitSet[l];
|
||||
final int l = read.getReadLength();
|
||||
final Long[] mismatches = new Long[l];
|
||||
final Long[] insertions = new Long[l];
|
||||
final Long[] deletions = new Long[l];
|
||||
|
||||
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
byte[] bases = clippedRead.getReadBases();
|
||||
|
|
@ -98,21 +97,21 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
public String formatKey(final Long key) {
|
||||
if (key == null) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
return BitSetUtils.dnaFrom(key);
|
||||
return contextFromKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return BitSetUtils.bitSetFrom((String) key);
|
||||
public Long longFromKey(Object key) {
|
||||
return longFrom((String) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Long.bitCount(-1L);
|
||||
return Integer.bitCount(1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -121,14 +120,14 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @return the bitSet representing the Context
|
||||
* @return the key representing the context
|
||||
*/
|
||||
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
|
||||
BitSet result = null;
|
||||
private Long contextWith(byte[] bases, int offset, int contextSize) {
|
||||
Long result = null;
|
||||
if (offset - contextSize + 1 >= 0) {
|
||||
final byte[] context = Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1);
|
||||
if (!BaseUtils.containsBase(context, BaseUtils.N))
|
||||
result = BitSetUtils.bitSetFrom(context);
|
||||
result = keyFromContext(context);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
@ -146,4 +145,125 @@ public class ContextCovariate implements StandardCovariate {
|
|||
array[r] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
|
||||
public static Long longFrom(final String dna) {
|
||||
return keyFromContext(dna.getBytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Long representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
public static Long keyFromContext(final byte[] dna) {
|
||||
if (dna.length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
||||
|
||||
final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (final byte base : dna) {
|
||||
baseTen = baseTen << 2; // multiply by 4
|
||||
baseTen += (long)BaseUtils.simpleBaseToBaseIndex(base);
|
||||
}
|
||||
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
* @return the sum of all combinations leading up to this context length.
|
||||
*/
|
||||
private static long combinationsFor(int length) {
|
||||
if (length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
|
||||
|
||||
// only calculate the number of combinations if the table hasn't already cached the value
|
||||
if (length > 0 && combinationsPerLength[length] == 0) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
return combinationsPerLength[length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a key into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param key the key representing the dna sequence
|
||||
* @return the dna sequence represented by the key
|
||||
*/
|
||||
public static String contextFromKey(Long key) {
|
||||
if (key < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
key -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
final byte base = (byte) (key & 3); // equivalent to (key % 4)
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(base));
|
||||
key = key >> 2; // divide by 4
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the key
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(final long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsFor(length); // calculate the next context
|
||||
}
|
||||
return length;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -45,7 +43,7 @@ public interface Covariate {
|
|||
*
|
||||
* @param RAC the recalibration argument collection
|
||||
*/
|
||||
public void initialize(RecalibrationArgumentCollection RAC);
|
||||
public void initialize(final RecalibrationArgumentCollection RAC);
|
||||
|
||||
/**
|
||||
* Calculates covariate values for all positions in the read.
|
||||
|
|
@ -53,7 +51,7 @@ public interface Covariate {
|
|||
* @param read the read to calculate the covariates on.
|
||||
* @return all the covariate values for every base in the read.
|
||||
*/
|
||||
public CovariateValues getValues(GATKSAMRecord read);
|
||||
public CovariateValues getValues(final GATKSAMRecord read);
|
||||
|
||||
/**
|
||||
* Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
|
|
@ -61,26 +59,26 @@ public interface Covariate {
|
|||
* @param str the key in string type (read from the csv)
|
||||
* @return the key in it's correct type.
|
||||
*/
|
||||
public Object getValue(String str);
|
||||
public Object getValue(final String str);
|
||||
|
||||
/**
|
||||
* Converts the bitset representation of the key (used internally for table indexing) to String format for file output.
|
||||
* Converts the internal representation of the key to String format for file output.
|
||||
*
|
||||
* @param key the bitset representation of the key
|
||||
* @param key the Long representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String keyFromBitSet(BitSet key);
|
||||
public String formatKey(final Long key);
|
||||
|
||||
/**
|
||||
* Converts a key into a bitset
|
||||
* Converts an Object key into a Long key using only the lowest numberOfBits() bits
|
||||
*
|
||||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in bitset format. For counting covariates
|
||||
* the getValues method already returns all values in BitSet format.
|
||||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates
|
||||
* the getValues method already returns all values in Long format.
|
||||
*
|
||||
* @param key the object corresponding to the covariate
|
||||
* @return a bitset representation of the object
|
||||
* @return a long representation of the object
|
||||
*/
|
||||
public BitSet bitSetFromKey(Object key);
|
||||
public Long longFromKey(final Object key);
|
||||
|
||||
/**
|
||||
* Each covariate should determine how many bits are necessary to encode it's data
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* An object to hold the different covariate values for all bases in the read.
|
||||
*
|
||||
|
|
@ -14,25 +12,25 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private final BitSet[] mismatches;
|
||||
private final BitSet[] insertions;
|
||||
private final BitSet[] deletions;
|
||||
private final Long[] mismatches;
|
||||
private final Long[] insertions;
|
||||
private final Long[] deletions;
|
||||
|
||||
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
|
||||
public CovariateValues(Long[] mismatch, Long[] insertion, Long[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public BitSet[] getMismatches() {
|
||||
public Long[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public BitSet[] getInsertions() {
|
||||
public Long[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public BitSet[] getDeletions() {
|
||||
public Long[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.NGSPlatform;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.EnumSet;
|
||||
|
||||
/*
|
||||
|
|
@ -61,7 +60,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
BitSet[] cycles = new BitSet[read.getReadLength()];
|
||||
Long[] cycles = new Long[read.getReadLength()];
|
||||
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||
|
||||
// Discrete cycle platforms
|
||||
|
|
@ -81,7 +80,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
|
||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : keyFromCycle(cycle);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
|
@ -108,19 +107,19 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii++;
|
||||
}
|
||||
if (iii < readLength) {
|
||||
|
|
@ -130,7 +129,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii++;
|
||||
}
|
||||
|
||||
|
|
@ -140,19 +139,19 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii--;
|
||||
}
|
||||
if (iii >= 0) {
|
||||
|
|
@ -162,7 +161,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
cycles[iii] = BitSetUtils.bitSetFrom(cycle);
|
||||
cycles[iii] = keyFromCycle(cycle);
|
||||
iii--;
|
||||
}
|
||||
}
|
||||
|
|
@ -184,17 +183,29 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return String.format("%d", BitSetUtils.shortFrom(key));
|
||||
public String formatKey(final Long key) {
|
||||
long cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
return String.format("%d", cycle);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return (key instanceof String) ? BitSetUtils.bitSetFrom(Short.parseShort((String) key)) : BitSetUtils.bitSetFrom((Short) key);
|
||||
public Long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? keyFromCycle(Short.parseShort((String) key)) : keyFromCycle((Short) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
|
||||
}
|
||||
|
||||
private static Long keyFromCycle(final short cycle) {
|
||||
// no negative values because
|
||||
long result = Math.abs(cycle);
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -50,18 +47,18 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int readLength = read.getReadLength();
|
||||
|
||||
BitSet[] mismatches = new BitSet[readLength];
|
||||
BitSet[] insertions = new BitSet[readLength];
|
||||
BitSet[] deletions = new BitSet[readLength];
|
||||
Long[] mismatches = new Long[readLength];
|
||||
Long[] insertions = new Long[readLength];
|
||||
Long[] deletions = new Long[readLength];
|
||||
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
mismatches[i] = BitSetUtils.bitSetFrom(baseQualities[i]);
|
||||
insertions[i] = BitSetUtils.bitSetFrom(baseInsertionQualities[i]);
|
||||
deletions[i] = BitSetUtils.bitSetFrom(baseDeletionQualities[i]);
|
||||
mismatches[i] = (long)baseQualities[i];
|
||||
insertions[i] = (long)baseInsertionQualities[i];
|
||||
deletions[i] = (long)baseDeletionQualities[i];
|
||||
}
|
||||
|
||||
return new CovariateValues(mismatches, insertions, deletions);
|
||||
|
|
@ -74,17 +71,17 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return String.format("%d", BitSetUtils.longFrom(key));
|
||||
public String formatKey(final Long key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return (key instanceof String) ? BitSetUtils.bitSetFrom(Byte.parseByte((String) key)) : BitSetUtils.bitSetFrom((Byte) key);
|
||||
public Long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -31,13 +30,13 @@ public class QuantizationInfo {
|
|||
this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals));
|
||||
}
|
||||
|
||||
public QuantizationInfo(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, int quantizationLevels) {
|
||||
public QuantizationInfo(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
Map<BitSet, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
Map<Long, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites
|
||||
qualTable = entry.getValue();
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
|
|
@ -13,16 +11,16 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private final BitSet[][] mismatchesKeySet;
|
||||
private final BitSet[][] insertionsKeySet;
|
||||
private final BitSet[][] deletionsKeySet;
|
||||
private final Long[][] mismatchesKeySet;
|
||||
private final Long[][] insertionsKeySet;
|
||||
private final Long[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
|
||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||
this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
|
||||
this.mismatchesKeySet = new Long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new Long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new Long[readLength][numberOfCovariates];
|
||||
this.nextCovariateIndex = 0;
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +31,7 @@ public class ReadCovariates {
|
|||
nextCovariateIndex++;
|
||||
}
|
||||
|
||||
public BitSet[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
public Long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
switch (errorModel) {
|
||||
case BASE_SUBSTITUTION:
|
||||
return getMismatchesKeySet(readPosition);
|
||||
|
|
@ -46,19 +44,19 @@ public class ReadCovariates {
|
|||
}
|
||||
}
|
||||
|
||||
public BitSet[] getMismatchesKeySet(int readPosition) {
|
||||
public Long[] getMismatchesKeySet(int readPosition) {
|
||||
return mismatchesKeySet[readPosition];
|
||||
}
|
||||
|
||||
public BitSet[] getInsertionsKeySet(int readPosition) {
|
||||
public Long[] getInsertionsKeySet(int readPosition) {
|
||||
return insertionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
public BitSet[] getDeletionsKeySet(int readPosition) {
|
||||
public Long[] getDeletionsKeySet(int readPosition) {
|
||||
return deletionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
|
||||
private void transposeCovariateValues(Long[][] keySet, Long[] covariateValues) {
|
||||
for (int i = 0; i < covariateValues.length; i++)
|
||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||
}
|
||||
|
|
@ -66,15 +64,15 @@ public class ReadCovariates {
|
|||
/**
|
||||
* Testing routines
|
||||
*/
|
||||
protected BitSet[][] getMismatchesKeySet() {
|
||||
protected Long[][] getMismatchesKeySet() {
|
||||
return mismatchesKeySet;
|
||||
}
|
||||
|
||||
protected BitSet[][] getInsertionsKeySet() {
|
||||
protected Long[][] getInsertionsKeySet() {
|
||||
return insertionsKeySet;
|
||||
}
|
||||
|
||||
protected BitSet[][] getDeletionsKeySet() {
|
||||
protected Long[][] getDeletionsKeySet() {
|
||||
return deletionsKeySet;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
|
||||
/*
|
||||
|
|
@ -43,22 +41,21 @@ import java.util.HashMap;
|
|||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
|
||||
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
|
||||
private short nextId = 0;
|
||||
private final HashMap<String, Long> readGroupLookupTable = new HashMap<String, Long>();
|
||||
private final HashMap<Long, String> readGroupReverseLookupTable = new HashMap<Long, String>();
|
||||
private long nextId = 0L;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
}
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
final int l = read.getReadLength();
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
|
||||
BitSet[] readGroups = new BitSet[l];
|
||||
Arrays.fill(readGroups, rg);
|
||||
final long key = keyForReadGroup(readGroupId);
|
||||
Long[] readGroups = new Long[l];
|
||||
Arrays.fill(readGroups, key);
|
||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||
}
|
||||
|
||||
|
|
@ -68,35 +65,28 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String keyFromBitSet(BitSet key) {
|
||||
return decodeReadGroup((short) BitSetUtils.longFrom(key));
|
||||
public String formatKey(final Long key) {
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BitSet bitSetFromKey(Object key) {
|
||||
return bitSetForReadGroup((String) key);
|
||||
public Long longFromKey(Object key) {
|
||||
return keyForReadGroup((String) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
}
|
||||
|
||||
private String decodeReadGroup(final short id) {
|
||||
return readGroupReverseLookupTable.get(id);
|
||||
}
|
||||
|
||||
private BitSet bitSetForReadGroup(String readGroupId) {
|
||||
short shortId;
|
||||
if (readGroupLookupTable.containsKey(readGroupId))
|
||||
shortId = readGroupLookupTable.get(readGroupId);
|
||||
else {
|
||||
shortId = nextId;
|
||||
private Long keyForReadGroup(final String readGroupId) {
|
||||
if (!readGroupLookupTable.containsKey(readGroupId)) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
}
|
||||
return BitSetUtils.bitSetFrom(shortId);
|
||||
}
|
||||
|
||||
return readGroupLookupTable.get(readGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -105,8 +95,8 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
* @param rg the read group record
|
||||
* @return platform unit or readgroup id
|
||||
*/
|
||||
private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) {
|
||||
String platformUnit = rg.getPlatformUnit();
|
||||
private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) {
|
||||
final String platformUnit = rg.getPlatformUnit();
|
||||
return platformUnit == null ? rg.getId() : platformUnit;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -149,17 +149,17 @@ public class RecalDataManager {
|
|||
* @param optionalCovariates list of optional covariates (in order)
|
||||
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
||||
*/
|
||||
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||
final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
public static LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||
final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
final ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
final ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
}
|
||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
return tablesAndKeysMap;
|
||||
|
|
@ -181,7 +181,7 @@ public class RecalDataManager {
|
|||
final List<Class<? extends RequiredCovariate>> requiredClasses = new PluginManager<RequiredCovariate>(RequiredCovariate.class).getPlugins();
|
||||
final List<Class<? extends StandardCovariate>> standardClasses = new PluginManager<StandardCovariate>(StandardCovariate.class).getPlugins();
|
||||
|
||||
ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
final ArrayList<Covariate> requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates
|
||||
ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
if (argumentCollection.USE_STANDARD_COVARIATES)
|
||||
optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user
|
||||
|
|
@ -223,7 +223,7 @@ public class RecalDataManager {
|
|||
logger.info("");
|
||||
}
|
||||
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
|
|
@ -235,19 +235,19 @@ public class RecalDataManager {
|
|||
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
Map<BitSet, RecalDatum> recalTable = entry.getValue();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = entry.getKey();
|
||||
final Map<Long, RecalDatum> recalTable = entry.getValue();
|
||||
|
||||
boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
|
||||
List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
final List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
final List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
|
||||
ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
|
||||
for (Covariate covariate : requiredList) {
|
||||
String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||
for (final Covariate covariate : requiredList) {
|
||||
final String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
|
||||
}
|
||||
|
||||
|
|
@ -263,30 +263,30 @@ public class RecalDataManager {
|
|||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size());
|
||||
for (Pair<String, String> columnName : columnNames)
|
||||
final GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size());
|
||||
for (final Pair<String, String> columnName : columnNames)
|
||||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type
|
||||
|
||||
int rowIndex = 0;
|
||||
|
||||
for (Map.Entry<BitSet, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
BitSet bitSetKey = recalTableEntry.getKey();
|
||||
Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||
for (Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||
String columnName = iterator.next().getFirst();
|
||||
for (Map.Entry<Long, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
final Long bitSetKey = recalTableEntry.getKey();
|
||||
final Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
final Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||
for (final Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||
final String columnName = iterator.next().getFirst();
|
||||
columnData.put(columnName, key);
|
||||
}
|
||||
RecalDatum datum = recalTableEntry.getValue();
|
||||
final RecalDatum datum = recalTableEntry.getValue();
|
||||
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality());
|
||||
if (isReadGroupTable)
|
||||
columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
columnData.put(iterator.next().getFirst(), datum.numObservations);
|
||||
columnData.put(iterator.next().getFirst(), datum.numMismatches);
|
||||
|
||||
for (Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||
String columnName = dataEntry.getKey();
|
||||
Object value = dataEntry.getValue();
|
||||
for (final Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||
final String columnName = dataEntry.getKey();
|
||||
final Object value = dataEntry.getValue();
|
||||
reportTable.set(rowIndex, columnName, value.toString());
|
||||
}
|
||||
rowIndex++;
|
||||
|
|
@ -296,16 +296,16 @@ public class RecalDataManager {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
}
|
||||
|
||||
private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List<GATKReportTable> recalTables, PrintStream outputFile) {
|
||||
GATKReport report = new GATKReport();
|
||||
final GATKReport report = new GATKReport();
|
||||
report.addTable(argumentTable);
|
||||
report.addTable(quantizationTable);
|
||||
report.addTables(recalTables);
|
||||
|
|
@ -328,7 +328,7 @@ public class RecalDataManager {
|
|||
final File plotFileName = new File(csvFileName + ".pdf");
|
||||
files.getFirst().close();
|
||||
|
||||
RScriptExecutor executor = new RScriptExecutor();
|
||||
final RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
|
||||
executor.addArgs(csvFileName.getAbsolutePath());
|
||||
executor.addArgs(plotFileName.getAbsolutePath());
|
||||
|
|
@ -340,32 +340,32 @@ public class RecalDataManager {
|
|||
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> original, boolean keepIntermediates) {
|
||||
Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", true);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||
Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", false);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
final int QUALITY_SCORE_COVARIATE_INDEX = 1;
|
||||
final Map<BitSet, RecalDatum> deltaTable = new HashMap<BitSet, RecalDatum>();
|
||||
final Map<Long, RecalDatum> deltaTable = new HashMap<Long, RecalDatum>();
|
||||
BQSRKeyManager deltaKeyManager = null;
|
||||
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table
|
||||
// create a key manager for the delta table
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates().get(0)); // include the read group covariate as the only required covariate
|
||||
List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates().get(1)); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates
|
||||
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
|
|
@ -376,37 +376,37 @@ public class RecalDataManager {
|
|||
throw new ReviewedStingException ("Couldn't find the covariates table");
|
||||
|
||||
boolean readyToPrint = false;
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
List<Object> newCovs = new ArrayList<Object>(4);
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score
|
||||
newCovs.add(1, covs.get(1));
|
||||
newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||
newCovs.add(3, covs.get(2));
|
||||
BitSet deltaKey = deltaKeyManager.bitSetFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
final Long deltaKey = deltaKeyManager.longFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
BitSet deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
final Long deltaKey = deltaKeyManager.longFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
readyToPrint = true;
|
||||
|
|
@ -416,7 +416,7 @@ public class RecalDataManager {
|
|||
if (readyToPrint) {
|
||||
|
||||
if (printHeader) {
|
||||
List<String> header = new LinkedList<String>();
|
||||
final List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
header.add("CovariateValue");
|
||||
header.add("CovariateName");
|
||||
|
|
@ -431,9 +431,9 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
// print each data line
|
||||
for(Map.Entry<BitSet, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
for (final Map.Entry<Long, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
final List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
final RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
|
|
@ -453,8 +453,8 @@ public class RecalDataManager {
|
|||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
*/
|
||||
private static void addToDeltaTable(Map<BitSet, RecalDatum> deltaTable, BitSet deltaKey, RecalDatum recalDatum) {
|
||||
RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(Map<Long, RecalDatum> deltaTable, Long deltaKey, RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ import java.util.*;
|
|||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
|
@ -42,15 +42,15 @@ public class RecalibrationReport {
|
|||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<BitSet, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||
final Map<Long, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
|
||||
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
|
|
@ -69,11 +69,11 @@ public class RecalibrationReport {
|
|||
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final Map<BitSet, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||
final Map<Long, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||
keysAndTablesMap.put(keyManager, table);
|
||||
}
|
||||
|
||||
protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) {
|
||||
protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.keysAndTablesMap = keysAndTablesMap;
|
||||
this.argumentTable = argumentTable;
|
||||
|
|
@ -94,25 +94,25 @@ public class RecalibrationReport {
|
|||
* @param other the recalibration report to combine with this one
|
||||
*/
|
||||
public void combine(RecalibrationReport other) {
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> thisEntry = thisIterator.next();
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> thisEntry = thisIterator.next();
|
||||
|
||||
Map<BitSet, RecalDatum> thisTable = thisEntry.getValue();
|
||||
BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
final Map<Long, RecalDatum> thisTable = thisEntry.getValue();
|
||||
final BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
final BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
|
||||
for (Map.Entry<BitSet, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
BitSet otherBitKey = otherTableEntry.getKey();
|
||||
List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
for (Map.Entry<Long, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
final RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
final Long otherBitKey = otherTableEntry.getKey();
|
||||
final List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
|
||||
BitSet thisBitKey = thisKeyManager.bitSetFromKey(otherObjectKey.toArray());
|
||||
RecalDatum thisDatum = thisTable.get(thisBitKey);
|
||||
final Long thisKey = thisKeyManager.longFromKey(otherObjectKey.toArray());
|
||||
final RecalDatum thisDatum = thisTable.get(thisKey);
|
||||
|
||||
if (thisDatum == null)
|
||||
thisTable.put(thisBitKey, otherDatum);
|
||||
thisTable.put(thisKey, otherDatum);
|
||||
else
|
||||
thisDatum.combine(otherDatum);
|
||||
}
|
||||
|
|
@ -123,7 +123,7 @@ public class RecalibrationReport {
|
|||
return quantizationInfo;
|
||||
}
|
||||
|
||||
public LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> getKeysAndTablesMap() {
|
||||
public LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> getKeysAndTablesMap() {
|
||||
return keysAndTablesMap;
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class RecalibrationReport {
|
|||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
private Map<Long, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
|
|
@ -155,7 +155,7 @@ public class RecalibrationReport {
|
|||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
private Map<Long, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
|
|
@ -170,7 +170,7 @@ public class RecalibrationReport {
|
|||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
private Map<Long, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
|
|
@ -185,26 +185,26 @@ public class RecalibrationReport {
|
|||
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<BitSet, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
|
||||
Map<BitSet, RecalDatum> result = new HashMap<BitSet, RecalDatum>(reportTable.getNumRows()*2);
|
||||
private Map<Long, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
|
||||
final Map<Long, RecalDatum> result = new HashMap<Long, RecalDatum>(reportTable.getNumRows()*2);
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
int nKeys = columnNamesOrderedList.size();
|
||||
Object [] keySet = new Object[nKeys];
|
||||
final int nKeys = columnNamesOrderedList.size();
|
||||
final Object [] keySet = new Object[nKeys];
|
||||
for (int j = 0; j < nKeys; j++)
|
||||
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
|
||||
BitSet bitKey = keyManager.bitSetFromKey(keySet);
|
||||
final Long bitKey = keyManager.longFromKey(keySet);
|
||||
|
||||
long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
final long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
final RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
result.put(bitKey, recalDatum);
|
||||
}
|
||||
return result;
|
||||
|
|
@ -217,14 +217,14 @@ public class RecalibrationReport {
|
|||
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
|
||||
*/
|
||||
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
|
||||
Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
byte originalQual = (byte)i;
|
||||
Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||
long quantizedCount = Long.parseLong(countObject.toString());
|
||||
final byte originalQual = (byte)i;
|
||||
final Object quantizedObject = table.get(i, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
final Object countObject = table.get(i, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||
final long quantizedCount = Long.parseLong(countObject.toString());
|
||||
quals[originalQual] = quantizedQual;
|
||||
counts[originalQual] = quantizedCount;
|
||||
}
|
||||
|
|
@ -238,7 +238,7 @@ public class RecalibrationReport {
|
|||
* @return a RAC object properly initialized with all the objects in the table
|
||||
*/
|
||||
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
|
||||
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final String argument = table.get(i, "Argument").toString();
|
||||
|
|
@ -306,7 +306,7 @@ public class RecalibrationReport {
|
|||
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||
*/
|
||||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (Map<Long, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality();
|
||||
|
||||
|
|
@ -331,26 +331,26 @@ public class RecalibrationReport {
|
|||
return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap);
|
||||
}
|
||||
|
||||
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> t2) {
|
||||
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t2) {
|
||||
if (t1.size() != t2.size())
|
||||
return false;
|
||||
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||
|
||||
while (t1Iterator.hasNext() && t2Iterator.hasNext()) {
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||
|
||||
if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey())))
|
||||
return false;
|
||||
|
||||
Map<BitSet, RecalDatum> table2 = t2MapEntry.getValue();
|
||||
for (Map.Entry<BitSet, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||
BitSet t1Key = t1TableEntry.getKey();
|
||||
final Map<Long, RecalDatum> table2 = t2MapEntry.getValue();
|
||||
for (Map.Entry<Long, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||
final Long t1Key = t1TableEntry.getKey();
|
||||
if (!table2.containsKey(t1Key))
|
||||
return false;
|
||||
RecalDatum t1Datum = t1TableEntry.getValue();
|
||||
final RecalDatum t1Datum = t1TableEntry.getValue();
|
||||
if (!t1Datum.equals(table2.get(t1Key)))
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,5 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -16,10 +12,8 @@ import java.util.Map;
|
|||
*/
|
||||
public class BitSetUtils {
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final private byte NBITS_LONG_REPRESENTATION = 64; // the number of bits used in the long version to represent the bit set (necessary for the two's complement representation of negative numbers)
|
||||
static final private byte NBITS_SHORT_REPRESENTATION = 16; // the number of bits used in the short version to represent the bit set (necessary for the two's complement representation of negative numbers)
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
|
||||
/**
|
||||
* Creates an long out of a bitset
|
||||
|
|
@ -112,173 +106,4 @@ public class BitSetUtils {
|
|||
}
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BitSet into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param bitSet the bitset representation of the dna sequence
|
||||
* @return the dna sequence represented by the bitset
|
||||
*/
|
||||
public static String dnaFrom(final BitSet bitSet) {
|
||||
long number = longFrom(bitSet); // the base_10 representation of the bit set
|
||||
if (number < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
byte base = (byte) (number % 4);
|
||||
switch (base) {
|
||||
case 0:
|
||||
dna.append('A');
|
||||
break;
|
||||
case 1:
|
||||
dna.append('C');
|
||||
break;
|
||||
case 2:
|
||||
dna.append('G');
|
||||
break;
|
||||
case 3:
|
||||
dna.append('T');
|
||||
break;
|
||||
}
|
||||
number /= 4;
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases. To increase this limit, use BigNumbers instead of long and create
|
||||
* a bitSetFrom(BigNumber) method.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @return the bitset representing the dna sequence
|
||||
*/
|
||||
public static BitSet bitSetFrom(String dna) {
|
||||
return bitSetFrom(dna.getBytes());
|
||||
}
|
||||
|
||||
public static BitSet bitSetFrom(final byte[] dna) {
|
||||
if (dna.length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
||||
|
||||
final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (final byte base : dna) {
|
||||
baseTen *= 4;
|
||||
baseTen += BaseUtils.simpleBaseToBaseIndex(base);
|
||||
}
|
||||
return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the bitset
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsFor(length); // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsFor(length); // calculate the next context
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
* @return the sum of all combinations leading up to this context length.
|
||||
*/
|
||||
private static long combinationsFor(int length) {
|
||||
if (length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("Context cannot be longer than %d bases but requested %d.", MAX_DNA_CONTEXT, length));
|
||||
|
||||
// only calculate the number of combinations if the table hasn't already cached the value
|
||||
if (length > 0 && combinationsPerLength[length] == 0) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
}
|
||||
return combinationsPerLength[length];
|
||||
}
|
||||
|
||||
|
||||
public static byte[] sizeOf(Object obj) throws java.io.IOException
|
||||
{
|
||||
ByteArrayOutputStream byteObject = new ByteArrayOutputStream();
|
||||
ObjectOutputStream objectOutputStream = new ObjectOutputStream(byteObject);
|
||||
objectOutputStream.writeObject(obj);
|
||||
objectOutputStream.flush();
|
||||
objectOutputStream.close();
|
||||
byteObject.close();
|
||||
|
||||
return byteObject.toByteArray();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -43,7 +42,7 @@ import java.util.*;
|
|||
|
||||
public class BaseRecalibration {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
|
||||
|
||||
|
|
@ -72,7 +71,7 @@ public class BaseRecalibration {
|
|||
* @param keysAndTablesMap the map of key managers and recalibration tables
|
||||
* @param requestedCovariates the list of requested covariates
|
||||
*/
|
||||
protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, ArrayList<Covariate> requestedCovariates) {
|
||||
protected BaseRecalibration(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, ArrayList<Covariate> requestedCovariates) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.keysAndTablesMap = keysAndTablesMap;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
|
|
@ -94,7 +93,7 @@ public class BaseRecalibration {
|
|||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
final Long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
|
|
@ -122,23 +121,23 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(BitSet[] key, EventType errorModel) {
|
||||
protected byte performSequentialQualityCalculation(Long[] key, EventType errorModel) {
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||
final String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
|
||||
|
||||
final byte qualFromRead = (byte) BitSetUtils.shortFrom(key[1]);
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
|
||||
double globalDeltaQ = 0.0;
|
||||
double deltaQReported = 0.0;
|
||||
double deltaQCovariates = 0.0;
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
Map<BitSet, RecalDatum> table = mapEntry.getValue();
|
||||
Map<Long, RecalDatum> table = mapEntry.getValue();
|
||||
|
||||
final List<Long> bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
|
||||
switch(keyManager.getRequiredCovariates().size()) {
|
||||
case 1: // this is the ReadGroup table
|
||||
List<BitSet> bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
|
||||
if (bitKeys.size() > 1)
|
||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||
|
||||
|
|
@ -151,7 +150,6 @@ public class BaseRecalibration {
|
|||
break;
|
||||
case 2:
|
||||
if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table
|
||||
bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to the reported quality score
|
||||
if (bitKeys.size() > 1)
|
||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||
|
||||
|
|
@ -162,8 +160,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
else { // this is the table with all the covariates
|
||||
bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn
|
||||
for (BitSet k : bitKeys) {
|
||||
for (Long k : bitKeys) {
|
||||
final RecalDatum empiricalQualCO = table.get(k);
|
||||
if (empiricalQualCO != null) {
|
||||
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ public class BQSRKeyManagerUnitTest {
|
|||
}
|
||||
|
||||
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||
final BitSet[][][] covariateKeys = new BitSet[covariateList.size()][EventType.values().length][];
|
||||
final Long[][][] covariateKeys = new Long[covariateList.size()][EventType.values().length][];
|
||||
int i = 0;
|
||||
for (Covariate cov : covariateList) {
|
||||
cov.initialize(RAC);
|
||||
|
|
@ -103,7 +103,7 @@ public class BQSRKeyManagerUnitTest {
|
|||
|
||||
for (int l = 0; l < read.getReadLength(); l++) {
|
||||
for (int eventType = 0; eventType < EventType.values().length; eventType++) {
|
||||
BitSet[] keySet = new BitSet[covariateList.size()];
|
||||
Long[] keySet = new Long[covariateList.size()];
|
||||
Object[] expectedRequired = new Object[covariateList.size()];
|
||||
Object[] expectedCovariate = new Object[covariateList.size()];
|
||||
|
||||
|
|
@ -111,14 +111,14 @@ public class BQSRKeyManagerUnitTest {
|
|||
keySet[j] = covariateKeys[j][eventType][l];
|
||||
|
||||
if (j < nRequired)
|
||||
expectedRequired[j] = covariateList.get(j).keyFromBitSet(keySet[j]);
|
||||
expectedRequired[j] = covariateList.get(j).formatKey(keySet[j]);
|
||||
else
|
||||
expectedCovariate[j - nRequired] = covariateList.get(j).keyFromBitSet(keySet[j]);
|
||||
expectedCovariate[j - nRequired] = covariateList.get(j).formatKey(keySet[j]);
|
||||
}
|
||||
|
||||
List<BitSet> hashKeys = keyManager.bitSetsFromAllKeys(keySet, EventType.eventFrom(eventType));
|
||||
List<Long> hashKeys = keyManager.longsFromAllKeys(keySet, EventType.eventFrom(eventType));
|
||||
short cov = 0;
|
||||
for (BitSet key : hashKeys) {
|
||||
for (Long key : hashKeys) {
|
||||
Object[] actual = keyManager.keySetFrom(key).toArray();
|
||||
|
||||
// Build the expected array
|
||||
|
|
|
|||
|
|
@ -8,8 +8,6 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/1/12
|
||||
|
|
@ -36,9 +34,9 @@ public class ContextCovariateUnitTest {
|
|||
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
}
|
||||
|
||||
public static void verifyCovariateArray(BitSet[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
public static void verifyCovariateArray(Long[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
for (int i = 0; i < values.length; i++)
|
||||
Assert.assertEquals(contextCovariate.keyFromBitSet(values[i]), expectedContext(read, i, contextSize));
|
||||
Assert.assertEquals(contextCovariate.formatKey(values[i]), expectedContext(read, i, contextSize));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,9 +48,9 @@ public class CycleCovariateUnitTest {
|
|||
verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(BitSet[] values, short init, short increment) {
|
||||
private void verifyCovariateArray(Long[] values, short init, short increment) {
|
||||
for (short i = 0; i < values.length; i++) {
|
||||
short actual = Short.decode(covariate.keyFromBitSet(values[i]));
|
||||
short actual = Short.decode(covariate.formatKey(values[i]));
|
||||
int expected = init + (increment * i);
|
||||
// System.out.println(String.format("%d: %d, %d", i, actual, expected));
|
||||
Assert.assertEquals(actual, expected);
|
||||
|
|
|
|||
|
|
@ -53,24 +53,24 @@ public class ReadCovariatesUnitTest {
|
|||
|
||||
for (int i = 0; i < length; i++) {
|
||||
// check that read group is always the same
|
||||
Assert.assertEquals(rgCov.keyFromBitSet(rc.getMismatchesKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.keyFromBitSet(rc.getInsertionsKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.keyFromBitSet(rc.getDeletionsKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getMismatchesKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getInsertionsKeySet(i)[0]), RGID);
|
||||
Assert.assertEquals(rgCov.formatKey(rc.getDeletionsKeySet(i)[0]), RGID);
|
||||
|
||||
// check quality score
|
||||
Assert.assertEquals(qsCov.keyFromBitSet(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
||||
Assert.assertEquals(qsCov.keyFromBitSet(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
|
||||
Assert.assertEquals(qsCov.keyFromBitSet(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
|
||||
Assert.assertEquals(qsCov.formatKey(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
|
||||
|
||||
// check context
|
||||
Assert.assertEquals(coCov.keyFromBitSet(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.keyFromBitSet(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.keyFromBitSet(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE));
|
||||
Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE));
|
||||
|
||||
// check cycle
|
||||
Assert.assertEquals(cyCov.keyFromBitSet(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.keyFromBitSet(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.keyFromBitSet(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
|
||||
Assert.assertEquals(cyCov.formatKey(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue