BQSR optimization: getRequiredCovariates() and getOptionalCovariates() were creating a new List every time they were being called, and unfortunately getRequiredCovariates().size() is used as the stop condition in for-loops throughout the code. Just maintaining the original list of covariates results in a 15% reduction in runtime for BQSR.
This commit is contained in:
parent
0fb9179f76
commit
31c3a6be48
|
|
@ -25,8 +25,11 @@ import java.util.*;
|
|||
* @since 3/6/12
|
||||
*/
|
||||
public class BQSRKeyManager {
|
||||
private final List<RequiredCovariateInfo> requiredCovariates;
|
||||
private final List<OptionalCovariateInfo> optionalCovariates;
|
||||
|
||||
private final List<Covariate> requiredCovariates;
|
||||
private final List<Covariate> optionalCovariates;
|
||||
private final List<RequiredCovariateInfo> requiredCovariatesInfo;
|
||||
private final List<OptionalCovariateInfo> optionalCovariatesInfo;
|
||||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
|
|
@ -44,15 +47,17 @@ public class BQSRKeyManager {
|
|||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||
this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
|
||||
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
|
||||
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
|
|
@ -62,9 +67,9 @@ public class BQSRKeyManager {
|
|||
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
||||
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
|
||||
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||
optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
this.covariateNameToIDMap.put(covariateName, id);
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
}
|
||||
|
||||
|
|
@ -100,10 +105,10 @@ public class BQSRKeyManager {
|
|||
|
||||
int covariateIndex = 0;
|
||||
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariates)
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
||||
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariates) {
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
|
||||
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
||||
if (covariateKey == null)
|
||||
continue; // do not add nulls to the final set of keys.
|
||||
|
|
@ -116,7 +121,7 @@ public class BQSRKeyManager {
|
|||
allBitSets.add(optionalKey); // add this key to the list of keys
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||
if (optionalCovariatesInfo.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||
allBitSets.add(requiredKey); // add this key to the list of keys
|
||||
}
|
||||
|
|
@ -140,16 +145,16 @@ public class BQSRKeyManager {
|
|||
BitSet bitSetKey = new BitSet(totalNumberOfBits);
|
||||
|
||||
int requiredCovariate = 0;
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariates) {
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() > 0) {
|
||||
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
int optionalCovariate = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
|
||||
OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||
|
||||
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
||||
|
|
@ -185,16 +190,16 @@ public class BQSRKeyManager {
|
|||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
||||
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() > 0) {
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
||||
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
||||
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
||||
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
|
||||
Covariate covariate = optionalCovariatesInfo.get(id).covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
|
|
@ -203,18 +208,17 @@ public class BQSRKeyManager {
|
|||
return objectKeys;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates a masked bitset into a bitset starting at 0
|
||||
*
|
||||
* @return a list of the optional covariates
|
||||
*/
|
||||
public List<Covariate> getRequiredCovariates() {
|
||||
ArrayList<Covariate> list = new ArrayList<Covariate>(requiredCovariates.size());
|
||||
for (RequiredCovariateInfo info : requiredCovariates)
|
||||
list.add(info.covariate);
|
||||
return list;
|
||||
return requiredCovariates;
|
||||
}
|
||||
|
||||
public List<Covariate> getOptionalCovariates() {
|
||||
ArrayList<Covariate> list = new ArrayList<Covariate>(optionalCovariates.size());
|
||||
for (OptionalCovariateInfo info : optionalCovariates)
|
||||
list.add(info.covariate);
|
||||
return list;
|
||||
return optionalCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -287,24 +291,24 @@ public class BQSRKeyManager {
|
|||
if (this == other)
|
||||
return true;
|
||||
|
||||
if (requiredCovariates.size() != other.requiredCovariates.size() || optionalCovariates.size() != other.optionalCovariates.size())
|
||||
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
|
||||
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
|
||||
return false;
|
||||
|
||||
Iterator<RequiredCovariateInfo> otherRequiredIterator = other.requiredCovariates.iterator();
|
||||
for (RequiredCovariateInfo thisInfo: requiredCovariates) {
|
||||
RequiredCovariateInfo otherInfo = otherRequiredIterator.next();
|
||||
|
||||
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
||||
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates.get(i);
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
|
||||
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
Iterator<OptionalCovariateInfo> otherOptionalIterator = other.optionalCovariates.iterator();
|
||||
for (OptionalCovariateInfo thisInfo : optionalCovariates) {
|
||||
OptionalCovariateInfo otherInfo = otherOptionalIterator.next();
|
||||
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
||||
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates.get(i);
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
|
||||
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue