Wow, apparently it's way, way less efficient to iterate over Java Lists than native arrays. With this change and the bit fiddling, Ryan's 10-day test case now runs in 1 day. More to come.

This commit is contained in:
Eric Banks 2012-06-12 13:32:56 -04:00
parent a96c5da884
commit 1da3e43679
4 changed files with 62 additions and 56 deletions

View File

@ -27,10 +27,10 @@ import java.util.*;
*/
public class BQSRKeyManager {
private final List<Covariate> requiredCovariates;
private final List<Covariate> optionalCovariates;
private final List<RequiredCovariateInfo> requiredCovariatesInfo;
private final List<OptionalCovariateInfo> optionalCovariatesInfo;
private final Covariate[] requiredCovariates;
private final Covariate[] optionalCovariates;
private final RequiredCovariateInfo[] requiredCovariatesInfo;
private final OptionalCovariateInfo[] optionalCovariatesInfo;
private final Map<String, Short> covariateNameToIDMap;
private int nRequiredBits; // Number of bits used to represent the required covariates
@ -49,17 +49,19 @@ public class BQSRKeyManager {
* @param optionalCovariates the ordered list of optional covariates
*/
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
this.requiredCovariates = new Covariate[requiredCovariates.size()];
this.optionalCovariates = new Covariate[optionalCovariates.size()];
requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
nRequiredBits = 0;
for (final Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
final Covariate required = requiredCovariates.get(i);
final int nBits = required.numberOfBits(); // number of bits used by this covariate
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
requiredCovariatesInfo.add(new RequiredCovariateInfo(nBits, nRequiredBits, mask, required)); // Create an object for this required covariate
this.requiredCovariates[i] = required;
requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
nRequiredBits += nBits;
}
@ -68,9 +70,11 @@ public class BQSRKeyManager {
short id = 0;
int nOptionalBits = 0;
for (final Covariate optional : optionalCovariates) {
for (int i = 0; i < optionalCovariates.size(); i++) {
final Covariate optional = optionalCovariates.get(i);
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
optionalCovariatesInfo.add(new OptionalCovariateInfo(id, optional));
this.optionalCovariates[i] = optional;
optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
covariateNameToIDMap.put(covariateName, id);
id++;
@ -105,7 +109,7 @@ public class BQSRKeyManager {
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
* @return one key in long representation per covariate
*/
public List<Long> longsFromAllKeys(Long[] allKeys, EventType eventType) {
public List<Long> longsFromAllKeys(final Long[] allKeys, final EventType eventType) {
final List<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate
int covariateIndex = 0;
@ -113,7 +117,7 @@ public class BQSRKeyManager {
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
masterKey |= (allKeys[covariateIndex++] << infoRequired.offset);
final long eventKey = keyFromEvent(eventType); // create a key for the event type
final long eventKey = keyFromEvent(eventType); // create a key for the event type
masterKey |= (eventKey << nRequiredBits);
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
@ -124,13 +128,10 @@ public class BQSRKeyManager {
long newKey = masterKey | (covariateKey << optionalCovariateOffset);
newKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
if ( newKey < 0 )
System.out.println("*** " + newKey);
allFinalKeys.add(newKey); // add this key to the list of keys
}
if (optionalCovariatesInfo.size() == 0) // special case when we have no optional covariates
if (optionalCovariatesInfo.length == 0) // special case when we have no optional covariates
allFinalKeys.add(masterKey);
return allFinalKeys;
@ -158,20 +159,17 @@ public class BQSRKeyManager {
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
masterKey |= (eventKey << nRequiredBits);
if (optionalCovariatesInfo.size() > 0) {
final int covariateIndex = requiredCovariatesInfo.size(); // the optional covariate index in the key array
if (optionalCovariatesInfo.length > 0) {
final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
masterKey |= (covariateKey << optionalCovariateOffset);
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
}
if ( masterKey < 0 )
System.out.println("*** " + masterKey);
return masterKey;
}
@ -201,10 +199,10 @@ public class BQSRKeyManager {
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
}
if (optionalCovariatesInfo.size() > 0) {
if (optionalCovariatesInfo.length > 0) {
final Long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
Covariate covariate = optionalCovariatesInfo.get((short)covIDKey).covariate; // get the corresponding optional covariate object
Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
}
@ -214,14 +212,22 @@ public class BQSRKeyManager {
return objectKeys;
}
public List<Covariate> getRequiredCovariates() {
public Covariate[] getRequiredCovariates() {
return requiredCovariates;
}
public List<Covariate> getOptionalCovariates() {
public Covariate[] getOptionalCovariates() {
return optionalCovariates;
}
public int getNumRequiredCovariates() {
return requiredCovariates.length;
}
public int getNumOptionalCovariates() {
return optionalCovariates.length;
}
/**
* Creates a mask for the requested covariate to extract the relevant key from a combined master key
*
@ -261,22 +267,22 @@ public class BQSRKeyManager {
if (this == other)
return true;
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length ||
optionalCovariatesInfo.length != other.optionalCovariatesInfo.length)
return false;
for (int i = 0; i < requiredCovariates.size(); i++) {
Covariate myRequiredCovariate = requiredCovariates.get(i);
Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
for (int i = 0; i < requiredCovariates.length; i++) {
Covariate myRequiredCovariate = requiredCovariates[i];
Covariate otherRequiredCovariate = other.requiredCovariates[i];
String thisName = myRequiredCovariate.getClass().getSimpleName();
String otherName = otherRequiredCovariate.getClass().getSimpleName();
if (!thisName.equals(otherName))
return false;
}
for (int i = 0; i < optionalCovariates.size(); i++) {
Covariate myOptionalCovariate = optionalCovariates.get(i);
Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
for (int i = 0; i < optionalCovariates.length; i++) {
Covariate myOptionalCovariate = optionalCovariates[i];
Covariate otherOptionalCovariate = other.optionalCovariates[i];
String thisName = myOptionalCovariate.getClass().getSimpleName();
String otherName = otherOptionalCovariate.getClass().getSimpleName();
if (!thisName.equals(otherName))

View File

@ -38,7 +38,7 @@ public class QuantizationInfo {
Map<Long, RecalDatum> qualTable = null; // look for the quality score table
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
BQSRKeyManager keyManager = entry.getKey();
if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites
if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates
qualTable = entry.getValue();
}

View File

@ -241,8 +241,8 @@ public class RecalDataManager {
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
final List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
final List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
@ -251,7 +251,7 @@ public class RecalDataManager {
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
}
if (optionalList.size() > 0) {
if (optionalList.length > 0) {
columnNames.add(covariateValue);
columnNames.add(covariateName);
}
@ -362,12 +362,12 @@ public class RecalDataManager {
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
final BQSRKeyManager keyManager = tableEntry.getKey();
if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table
if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table
// create a key manager for the delta table
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates().get(0)); // include the read group covariate as the only required covariate
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate
final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
optionalCovariates.add(keyManager.getRequiredCovariates().get(1)); // include the quality score covariate as an optional covariate
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates
optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate
optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
}
}
@ -379,7 +379,7 @@ public class RecalDataManager {
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
final BQSRKeyManager keyManager = tableEntry.getKey();
if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table
if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table
final Map<Long, RecalDatum> table = tableEntry.getValue();
// add the quality score table to the delta table
@ -397,7 +397,7 @@ public class RecalDataManager {
}
}
else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table
else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table
final Map<Long, RecalDatum> table = tableEntry.getValue();
// add the optional covariates to the delta table

View File

@ -41,11 +41,13 @@ import java.util.*;
*/
public class BaseRecalibration {
private final static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
private final static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
/**
* Constructor using a GATK Report file
*
@ -121,9 +123,7 @@ public class BaseRecalibration {
* @param errorModel the event type
* @return A recalibrated quality score as a byte
*/
protected byte performSequentialQualityCalculation(Long[] key, EventType errorModel) {
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
final String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
protected byte performSequentialQualityCalculation(final Long[] key, final EventType errorModel) {
final byte qualFromRead = (byte)(long)key[1];
@ -132,11 +132,11 @@ public class BaseRecalibration {
double deltaQCovariates = 0.0;
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
BQSRKeyManager keyManager = mapEntry.getKey();
Map<Long, RecalDatum> table = mapEntry.getValue();
final BQSRKeyManager keyManager = mapEntry.getKey();
final Map<Long, RecalDatum> table = mapEntry.getValue();
final List<Long> bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
switch(keyManager.getRequiredCovariates().size()) {
switch(keyManager.getNumRequiredCovariates()) {
case 1: // this is the ReadGroup table
if (bitKeys.size() > 1)
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
@ -149,7 +149,7 @@ public class BaseRecalibration {
}
break;
case 2:
if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table
if (keyManager.getNumOptionalCovariates() == 0) { // this is the QualityScore table
if (bitKeys.size() > 1)
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
@ -160,10 +160,10 @@ public class BaseRecalibration {
}
}
else { // this is the table with all the covariates
for (Long k : bitKeys) {
for (final Long k : bitKeys) {
final RecalDatum empiricalQualCO = table.get(k);
if (empiricalQualCO != null) {
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
}
}