BQSR optimization: getRequiredCovariates() and getOptionalCovariates() were creating a new List every time they were being called, and unfortunately getRequiredCovariates().size() is used as the stop condition in for-loops throughout the code. Just maintaining the original list of covariates results in a 15% reduction in runtime for BQSR.
This commit is contained in:
parent
0fb9179f76
commit
31c3a6be48
|
|
@ -25,8 +25,11 @@ import java.util.*;
|
||||||
* @since 3/6/12
|
* @since 3/6/12
|
||||||
*/
|
*/
|
||||||
public class BQSRKeyManager {
|
public class BQSRKeyManager {
|
||||||
private final List<RequiredCovariateInfo> requiredCovariates;
|
|
||||||
private final List<OptionalCovariateInfo> optionalCovariates;
|
private final List<Covariate> requiredCovariates;
|
||||||
|
private final List<Covariate> optionalCovariates;
|
||||||
|
private final List<RequiredCovariateInfo> requiredCovariatesInfo;
|
||||||
|
private final List<OptionalCovariateInfo> optionalCovariatesInfo;
|
||||||
private final Map<String, Short> covariateNameToIDMap;
|
private final Map<String, Short> covariateNameToIDMap;
|
||||||
|
|
||||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||||
|
|
@ -44,15 +47,17 @@ public class BQSRKeyManager {
|
||||||
* @param optionalCovariates the ordered list of optional covariates
|
* @param optionalCovariates the ordered list of optional covariates
|
||||||
*/
|
*/
|
||||||
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
||||||
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
|
||||||
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
|
||||||
this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||||
|
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||||
|
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||||
|
|
||||||
nRequiredBits = 0;
|
nRequiredBits = 0;
|
||||||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||||
nRequiredBits += nBits;
|
nRequiredBits += nBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -62,9 +67,9 @@ public class BQSRKeyManager {
|
||||||
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
||||||
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||||
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
|
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
|
||||||
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||||
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||||
this.covariateNameToIDMap.put(covariateName, id);
|
covariateNameToIDMap.put(covariateName, id);
|
||||||
id++;
|
id++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,10 +105,10 @@ public class BQSRKeyManager {
|
||||||
|
|
||||||
int covariateIndex = 0;
|
int covariateIndex = 0;
|
||||||
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
||||||
for (RequiredCovariateInfo infoRequired : requiredCovariates)
|
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||||
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
||||||
|
|
||||||
for (OptionalCovariateInfo infoOptional : optionalCovariates) {
|
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
|
||||||
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
||||||
if (covariateKey == null)
|
if (covariateKey == null)
|
||||||
continue; // do not add nulls to the final set of keys.
|
continue; // do not add nulls to the final set of keys.
|
||||||
|
|
@ -116,7 +121,7 @@ public class BQSRKeyManager {
|
||||||
allBitSets.add(optionalKey); // add this key to the list of keys
|
allBitSets.add(optionalKey); // add this key to the list of keys
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optionalCovariates.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
if (optionalCovariatesInfo.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||||
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||||
allBitSets.add(requiredKey); // add this key to the list of keys
|
allBitSets.add(requiredKey); // add this key to the list of keys
|
||||||
}
|
}
|
||||||
|
|
@ -140,16 +145,16 @@ public class BQSRKeyManager {
|
||||||
BitSet bitSetKey = new BitSet(totalNumberOfBits);
|
BitSet bitSetKey = new BitSet(totalNumberOfBits);
|
||||||
|
|
||||||
int requiredCovariate = 0;
|
int requiredCovariate = 0;
|
||||||
for (RequiredCovariateInfo infoRequired : requiredCovariates) {
|
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) {
|
||||||
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
||||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optionalCovariates.size() > 0) {
|
if (optionalCovariatesInfo.size() > 0) {
|
||||||
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
|
int optionalCovariate = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||||
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
||||||
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||||
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
|
OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||||
|
|
||||||
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
||||||
|
|
@ -185,16 +190,16 @@ public class BQSRKeyManager {
|
||||||
*/
|
*/
|
||||||
public List<Object> keySetFrom(BitSet key) {
|
public List<Object> keySetFrom(BitSet key) {
|
||||||
List<Object> objectKeys = new ArrayList<Object>();
|
List<Object> objectKeys = new ArrayList<Object>();
|
||||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optionalCovariates.size() > 0) {
|
if (optionalCovariatesInfo.size() > 0) {
|
||||||
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
||||||
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
||||||
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
||||||
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
|
Covariate covariate = optionalCovariatesInfo.get(id).covariate; // get the corresponding optional covariate object
|
||||||
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
||||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||||
}
|
}
|
||||||
|
|
@ -203,18 +208,17 @@ public class BQSRKeyManager {
|
||||||
return objectKeys;
|
return objectKeys;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Translates a masked bitset into a bitset starting at 0
|
||||||
|
*
|
||||||
|
* @return a list of the optional covariates
|
||||||
|
*/
|
||||||
public List<Covariate> getRequiredCovariates() {
|
public List<Covariate> getRequiredCovariates() {
|
||||||
ArrayList<Covariate> list = new ArrayList<Covariate>(requiredCovariates.size());
|
return requiredCovariates;
|
||||||
for (RequiredCovariateInfo info : requiredCovariates)
|
|
||||||
list.add(info.covariate);
|
|
||||||
return list;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Covariate> getOptionalCovariates() {
|
public List<Covariate> getOptionalCovariates() {
|
||||||
ArrayList<Covariate> list = new ArrayList<Covariate>(optionalCovariates.size());
|
return optionalCovariates;
|
||||||
for (OptionalCovariateInfo info : optionalCovariates)
|
|
||||||
list.add(info.covariate);
|
|
||||||
return list;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -287,24 +291,24 @@ public class BQSRKeyManager {
|
||||||
if (this == other)
|
if (this == other)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (requiredCovariates.size() != other.requiredCovariates.size() || optionalCovariates.size() != other.optionalCovariates.size())
|
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
|
||||||
|
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
Iterator<RequiredCovariateInfo> otherRequiredIterator = other.requiredCovariates.iterator();
|
for (int i = 0; i < requiredCovariates.size(); i++) {
|
||||||
for (RequiredCovariateInfo thisInfo: requiredCovariates) {
|
Covariate myRequiredCovariate = requiredCovariates.get(i);
|
||||||
RequiredCovariateInfo otherInfo = otherRequiredIterator.next();
|
Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
|
||||||
|
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||||
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||||
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
|
||||||
if (!thisName.equals(otherName))
|
if (!thisName.equals(otherName))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator<OptionalCovariateInfo> otherOptionalIterator = other.optionalCovariates.iterator();
|
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||||
for (OptionalCovariateInfo thisInfo : optionalCovariates) {
|
Covariate myOptionalCovariate = optionalCovariates.get(i);
|
||||||
OptionalCovariateInfo otherInfo = otherOptionalIterator.next();
|
Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
|
||||||
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||||
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||||
if (!thisName.equals(otherName))
|
if (!thisName.equals(otherName))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue