Wow, apparently it's way, way less efficient to iterate over Java Lists than native arrays. With this change and the bit fiddling, Ryan's 10-day test case now runs in 1 day. More to come.

This commit is contained in:
Eric Banks 2012-06-12 13:32:56 -04:00
parent a96c5da884
commit 1da3e43679
4 changed files with 62 additions and 56 deletions

View File

@ -27,10 +27,10 @@ import java.util.*;
*/ */
public class BQSRKeyManager { public class BQSRKeyManager {
private final List<Covariate> requiredCovariates; private final Covariate[] requiredCovariates;
private final List<Covariate> optionalCovariates; private final Covariate[] optionalCovariates;
private final List<RequiredCovariateInfo> requiredCovariatesInfo; private final RequiredCovariateInfo[] requiredCovariatesInfo;
private final List<OptionalCovariateInfo> optionalCovariatesInfo; private final OptionalCovariateInfo[] optionalCovariatesInfo;
private final Map<String, Short> covariateNameToIDMap; private final Map<String, Short> covariateNameToIDMap;
private int nRequiredBits; // Number of bits used to represent the required covariates private int nRequiredBits; // Number of bits used to represent the required covariates
@ -49,17 +49,19 @@ public class BQSRKeyManager {
* @param optionalCovariates the ordered list of optional covariates * @param optionalCovariates the ordered list of optional covariates
*/ */
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) { public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates); this.requiredCovariates = new Covariate[requiredCovariates.size()];
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates); this.optionalCovariates = new Covariate[optionalCovariates.size()];
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay) optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates) covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
nRequiredBits = 0; nRequiredBits = 0;
for (final Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
final Covariate required = requiredCovariates.get(i);
final int nBits = required.numberOfBits(); // number of bits used by this covariate final int nBits = required.numberOfBits(); // number of bits used by this covariate
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
requiredCovariatesInfo.add(new RequiredCovariateInfo(nBits, nRequiredBits, mask, required)); // Create an object for this required covariate this.requiredCovariates[i] = required;
requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
nRequiredBits += nBits; nRequiredBits += nBits;
} }
@ -68,9 +70,11 @@ public class BQSRKeyManager {
short id = 0; short id = 0;
int nOptionalBits = 0; int nOptionalBits = 0;
for (final Covariate optional : optionalCovariates) { for (int i = 0; i < optionalCovariates.size(); i++) {
final Covariate optional = optionalCovariates.get(i);
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
optionalCovariatesInfo.add(new OptionalCovariateInfo(id, optional)); this.optionalCovariates[i] = optional;
optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
covariateNameToIDMap.put(covariateName, id); covariateNameToIDMap.put(covariateName, id);
id++; id++;
@ -105,7 +109,7 @@ public class BQSRKeyManager {
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions) * @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
* @return one key in long representation per covariate * @return one key in long representation per covariate
*/ */
public List<Long> longsFromAllKeys(Long[] allKeys, EventType eventType) { public List<Long> longsFromAllKeys(final Long[] allKeys, final EventType eventType) {
final List<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate final List<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate
int covariateIndex = 0; int covariateIndex = 0;
@ -113,7 +117,7 @@ public class BQSRKeyManager {
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
masterKey |= (allKeys[covariateIndex++] << infoRequired.offset); masterKey |= (allKeys[covariateIndex++] << infoRequired.offset);
final long eventKey = keyFromEvent(eventType); // create a key for the event type final long eventKey = keyFromEvent(eventType); // create a key for the event type
masterKey |= (eventKey << nRequiredBits); masterKey |= (eventKey << nRequiredBits);
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) { for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
@ -124,13 +128,10 @@ public class BQSRKeyManager {
long newKey = masterKey | (covariateKey << optionalCovariateOffset); long newKey = masterKey | (covariateKey << optionalCovariateOffset);
newKey |= (infoOptional.covariateID << optionalCovariateIDOffset); newKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
if ( newKey < 0 )
System.out.println("*** " + newKey);
allFinalKeys.add(newKey); // add this key to the list of keys allFinalKeys.add(newKey); // add this key to the list of keys
} }
if (optionalCovariatesInfo.size() == 0) // special case when we have no optional covariates if (optionalCovariatesInfo.length == 0) // special case when we have no optional covariates
allFinalKeys.add(masterKey); allFinalKeys.add(masterKey);
return allFinalKeys; return allFinalKeys;
@ -158,20 +159,17 @@ public class BQSRKeyManager {
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
masterKey |= (eventKey << nRequiredBits); masterKey |= (eventKey << nRequiredBits);
if (optionalCovariatesInfo.size() > 0) { if (optionalCovariatesInfo.length > 0) {
final int covariateIndex = requiredCovariatesInfo.size(); // the optional covariate index in the key array final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
masterKey |= (covariateKey << optionalCovariateOffset); masterKey |= (covariateKey << optionalCovariateOffset);
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset); masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
} }
if ( masterKey < 0 )
System.out.println("*** " + masterKey);
return masterKey; return masterKey;
} }
@ -201,10 +199,10 @@ public class BQSRKeyManager {
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
} }
if (optionalCovariatesInfo.size() > 0) { if (optionalCovariatesInfo.length > 0) {
final Long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key final Long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is) final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
Covariate covariate = optionalCovariatesInfo.get((short)covIDKey).covariate; // get the corresponding optional covariate object Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
} }
@ -214,14 +212,22 @@ public class BQSRKeyManager {
return objectKeys; return objectKeys;
} }
public List<Covariate> getRequiredCovariates() { public Covariate[] getRequiredCovariates() {
return requiredCovariates; return requiredCovariates;
} }
public List<Covariate> getOptionalCovariates() { public Covariate[] getOptionalCovariates() {
return optionalCovariates; return optionalCovariates;
} }
public int getNumRequiredCovariates() {
return requiredCovariates.length;
}
public int getNumOptionalCovariates() {
return optionalCovariates.length;
}
/** /**
* Creates a mask for the requested covariate to extract the relevant key from a combined master key * Creates a mask for the requested covariate to extract the relevant key from a combined master key
* *
@ -261,22 +267,22 @@ public class BQSRKeyManager {
if (this == other) if (this == other)
return true; return true;
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() || if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length ||
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size()) optionalCovariatesInfo.length != other.optionalCovariatesInfo.length)
return false; return false;
for (int i = 0; i < requiredCovariates.size(); i++) { for (int i = 0; i < requiredCovariates.length; i++) {
Covariate myRequiredCovariate = requiredCovariates.get(i); Covariate myRequiredCovariate = requiredCovariates[i];
Covariate otherRequiredCovariate = other.requiredCovariates.get(i); Covariate otherRequiredCovariate = other.requiredCovariates[i];
String thisName = myRequiredCovariate.getClass().getSimpleName(); String thisName = myRequiredCovariate.getClass().getSimpleName();
String otherName = otherRequiredCovariate.getClass().getSimpleName(); String otherName = otherRequiredCovariate.getClass().getSimpleName();
if (!thisName.equals(otherName)) if (!thisName.equals(otherName))
return false; return false;
} }
for (int i = 0; i < optionalCovariates.size(); i++) { for (int i = 0; i < optionalCovariates.length; i++) {
Covariate myOptionalCovariate = optionalCovariates.get(i); Covariate myOptionalCovariate = optionalCovariates[i];
Covariate otherOptionalCovariate = other.optionalCovariates.get(i); Covariate otherOptionalCovariate = other.optionalCovariates[i];
String thisName = myOptionalCovariate.getClass().getSimpleName(); String thisName = myOptionalCovariate.getClass().getSimpleName();
String otherName = otherOptionalCovariate.getClass().getSimpleName(); String otherName = otherOptionalCovariate.getClass().getSimpleName();
if (!thisName.equals(otherName)) if (!thisName.equals(otherName))

View File

@ -38,7 +38,7 @@ public class QuantizationInfo {
Map<Long, RecalDatum> qualTable = null; // look for the quality score table Map<Long, RecalDatum> qualTable = null; // look for the quality score table
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) { for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
BQSRKeyManager keyManager = entry.getKey(); BQSRKeyManager keyManager = entry.getKey();
if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates
qualTable = entry.getValue(); qualTable = entry.getValue();
} }

View File

@ -241,8 +241,8 @@ public class RecalDataManager {
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs. final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
final List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
final List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
@ -251,7 +251,7 @@ public class RecalDataManager {
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
} }
if (optionalList.size() > 0) { if (optionalList.length > 0) {
columnNames.add(covariateValue); columnNames.add(covariateValue);
columnNames.add(covariateName); columnNames.add(covariateName);
} }
@ -362,12 +362,12 @@ public class RecalDataManager {
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) { for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
final BQSRKeyManager keyManager = tableEntry.getKey(); final BQSRKeyManager keyManager = tableEntry.getKey();
if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table
// create a key manager for the delta table // create a key manager for the delta table
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates().get(0)); // include the read group covariate as the only required covariate final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate
final List<Covariate> optionalCovariates = new ArrayList<Covariate>(); final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
optionalCovariates.add(keyManager.getRequiredCovariates().get(1)); // include the quality score covariate as an optional covariate optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
} }
} }
@ -379,7 +379,7 @@ public class RecalDataManager {
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) { for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
final BQSRKeyManager keyManager = tableEntry.getKey(); final BQSRKeyManager keyManager = tableEntry.getKey();
if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table
final Map<Long, RecalDatum> table = tableEntry.getValue(); final Map<Long, RecalDatum> table = tableEntry.getValue();
// add the quality score table to the delta table // add the quality score table to the delta table
@ -397,7 +397,7 @@ public class RecalDataManager {
} }
} }
else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table
final Map<Long, RecalDatum> table = tableEntry.getValue(); final Map<Long, RecalDatum> table = tableEntry.getValue();
// add the optional covariates to the delta table // add the optional covariates to the delta table

View File

@ -41,11 +41,13 @@ import java.util.*;
*/ */
public class BaseRecalibration { public class BaseRecalibration {
private final static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
private final static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
/** /**
* Constructor using a GATK Report file * Constructor using a GATK Report file
* *
@ -121,9 +123,7 @@ public class BaseRecalibration {
* @param errorModel the event type * @param errorModel the event type
* @return A recalibrated quality score as a byte * @return A recalibrated quality score as a byte
*/ */
protected byte performSequentialQualityCalculation(Long[] key, EventType errorModel) { protected byte performSequentialQualityCalculation(final Long[] key, final EventType errorModel) {
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
final String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
final byte qualFromRead = (byte)(long)key[1]; final byte qualFromRead = (byte)(long)key[1];
@ -132,11 +132,11 @@ public class BaseRecalibration {
double deltaQCovariates = 0.0; double deltaQCovariates = 0.0;
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) { for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
BQSRKeyManager keyManager = mapEntry.getKey(); final BQSRKeyManager keyManager = mapEntry.getKey();
Map<Long, RecalDatum> table = mapEntry.getValue(); final Map<Long, RecalDatum> table = mapEntry.getValue();
final List<Long> bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group final List<Long> bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
switch(keyManager.getRequiredCovariates().size()) { switch(keyManager.getNumRequiredCovariates()) {
case 1: // this is the ReadGroup table case 1: // this is the ReadGroup table
if (bitKeys.size() > 1) if (bitKeys.size() > 1)
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
@ -149,7 +149,7 @@ public class BaseRecalibration {
} }
break; break;
case 2: case 2:
if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table if (keyManager.getNumOptionalCovariates() == 0) { // this is the QualityScore table
if (bitKeys.size() > 1) if (bitKeys.size() > 1)
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION); throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
@ -160,10 +160,10 @@ public class BaseRecalibration {
} }
} }
else { // this is the table with all the covariates else { // this is the table with all the covariates
for (Long k : bitKeys) { for (final Long k : bitKeys) {
final RecalDatum empiricalQualCO = table.get(k); final RecalDatum empiricalQualCO = table.get(k);
if (empiricalQualCO != null) { if (empiricalQualCO != null) {
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
} }
} }