Wow, apparently it's way, way less efficient to iterate over Java Lists than native arrays. With this change and the bit fiddling, Ryan's 10-day test case now runs in 1 day. More to come.
This commit is contained in:
parent
a96c5da884
commit
1da3e43679
|
|
@ -27,10 +27,10 @@ import java.util.*;
|
|||
*/
|
||||
public class BQSRKeyManager {
|
||||
|
||||
private final List<Covariate> requiredCovariates;
|
||||
private final List<Covariate> optionalCovariates;
|
||||
private final List<RequiredCovariateInfo> requiredCovariatesInfo;
|
||||
private final List<OptionalCovariateInfo> optionalCovariatesInfo;
|
||||
private final Covariate[] requiredCovariates;
|
||||
private final Covariate[] optionalCovariates;
|
||||
private final RequiredCovariateInfo[] requiredCovariatesInfo;
|
||||
private final OptionalCovariateInfo[] optionalCovariatesInfo;
|
||||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
|
|
@ -49,17 +49,19 @@ public class BQSRKeyManager {
|
|||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
|
||||
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
|
||||
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||
this.requiredCovariates = new Covariate[requiredCovariates.size()];
|
||||
this.optionalCovariates = new Covariate[optionalCovariates.size()];
|
||||
requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
|
||||
optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
|
||||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (final Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
|
||||
final Covariate required = requiredCovariates.get(i);
|
||||
final int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
requiredCovariatesInfo.add(new RequiredCovariateInfo(nBits, nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
this.requiredCovariates[i] = required;
|
||||
requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
|
|
@ -68,9 +70,11 @@ public class BQSRKeyManager {
|
|||
|
||||
short id = 0;
|
||||
int nOptionalBits = 0;
|
||||
for (final Covariate optional : optionalCovariates) {
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
final Covariate optional = optionalCovariates.get(i);
|
||||
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
optionalCovariatesInfo.add(new OptionalCovariateInfo(id, optional));
|
||||
this.optionalCovariates[i] = optional;
|
||||
optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
|
||||
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
|
|
@ -105,7 +109,7 @@ public class BQSRKeyManager {
|
|||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in long representation per covariate
|
||||
*/
|
||||
public List<Long> longsFromAllKeys(Long[] allKeys, EventType eventType) {
|
||||
public List<Long> longsFromAllKeys(final Long[] allKeys, final EventType eventType) {
|
||||
final List<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate
|
||||
|
||||
int covariateIndex = 0;
|
||||
|
|
@ -113,7 +117,7 @@ public class BQSRKeyManager {
|
|||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (allKeys[covariateIndex++] << infoRequired.offset);
|
||||
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
|
||||
|
|
@ -124,13 +128,10 @@ public class BQSRKeyManager {
|
|||
long newKey = masterKey | (covariateKey << optionalCovariateOffset);
|
||||
newKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
|
||||
if ( newKey < 0 )
|
||||
System.out.println("*** " + newKey);
|
||||
|
||||
allFinalKeys.add(newKey); // add this key to the list of keys
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() == 0) // special case when we have no optional covariates
|
||||
if (optionalCovariatesInfo.length == 0) // special case when we have no optional covariates
|
||||
allFinalKeys.add(masterKey);
|
||||
|
||||
return allFinalKeys;
|
||||
|
|
@ -158,20 +159,17 @@ public class BQSRKeyManager {
|
|||
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
final int covariateIndex = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
|
||||
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
|
||||
|
||||
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
if ( masterKey < 0 )
|
||||
System.out.println("*** " + masterKey);
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
|
|
@ -201,10 +199,10 @@ public class BQSRKeyManager {
|
|||
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final Long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
|
||||
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
|
||||
Covariate covariate = optionalCovariatesInfo.get((short)covIDKey).covariate; // get the corresponding optional covariate object
|
||||
Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
|
|
@ -214,14 +212,22 @@ public class BQSRKeyManager {
|
|||
return objectKeys;
|
||||
}
|
||||
|
||||
public List<Covariate> getRequiredCovariates() {
|
||||
public Covariate[] getRequiredCovariates() {
|
||||
return requiredCovariates;
|
||||
}
|
||||
|
||||
public List<Covariate> getOptionalCovariates() {
|
||||
public Covariate[] getOptionalCovariates() {
|
||||
return optionalCovariates;
|
||||
}
|
||||
|
||||
public int getNumRequiredCovariates() {
|
||||
return requiredCovariates.length;
|
||||
}
|
||||
|
||||
public int getNumOptionalCovariates() {
|
||||
return optionalCovariates.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a mask for the requested covariate to extract the relevant key from a combined master key
|
||||
*
|
||||
|
|
@ -261,22 +267,22 @@ public class BQSRKeyManager {
|
|||
if (this == other)
|
||||
return true;
|
||||
|
||||
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
|
||||
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
|
||||
if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length ||
|
||||
optionalCovariatesInfo.length != other.optionalCovariatesInfo.length)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates.get(i);
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
|
||||
for (int i = 0; i < requiredCovariates.length; i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates[i];
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates[i];
|
||||
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates.get(i);
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
|
||||
for (int i = 0; i < optionalCovariates.length; i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates[i];
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates[i];
|
||||
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class QuantizationInfo {
|
|||
Map<Long, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites
|
||||
if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates
|
||||
qualTable = entry.getValue();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -241,8 +241,8 @@ public class RecalDataManager {
|
|||
|
||||
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
|
||||
final List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
final List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
|
||||
|
|
@ -251,7 +251,7 @@ public class RecalDataManager {
|
|||
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
|
||||
}
|
||||
|
||||
if (optionalList.size() > 0) {
|
||||
if (optionalList.length > 0) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
|
|
@ -362,12 +362,12 @@ public class RecalDataManager {
|
|||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table
|
||||
if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table
|
||||
// create a key manager for the delta table
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates().get(0)); // include the read group covariate as the only required covariate
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate
|
||||
final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates().get(1)); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates
|
||||
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
}
|
||||
}
|
||||
|
|
@ -379,7 +379,7 @@ public class RecalDataManager {
|
|||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table
|
||||
if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
|
|
@ -397,7 +397,7 @@ public class RecalDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table
|
||||
else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
|
|
|
|||
|
|
@ -41,11 +41,13 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
public class BaseRecalibration {
|
||||
private final static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||
private final static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
|
||||
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
|
||||
|
||||
/**
|
||||
* Constructor using a GATK Report file
|
||||
*
|
||||
|
|
@ -121,9 +123,7 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(Long[] key, EventType errorModel) {
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||
final String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
|
||||
protected byte performSequentialQualityCalculation(final Long[] key, final EventType errorModel) {
|
||||
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
|
||||
|
|
@ -132,11 +132,11 @@ public class BaseRecalibration {
|
|||
double deltaQCovariates = 0.0;
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
Map<Long, RecalDatum> table = mapEntry.getValue();
|
||||
final BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
final Map<Long, RecalDatum> table = mapEntry.getValue();
|
||||
|
||||
final List<Long> bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
|
||||
switch(keyManager.getRequiredCovariates().size()) {
|
||||
switch(keyManager.getNumRequiredCovariates()) {
|
||||
case 1: // this is the ReadGroup table
|
||||
if (bitKeys.size() > 1)
|
||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||
|
|
@ -149,7 +149,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
break;
|
||||
case 2:
|
||||
if (keyManager.getOptionalCovariates().isEmpty()) { // this is the QualityScore table
|
||||
if (keyManager.getNumOptionalCovariates() == 0) { // this is the QualityScore table
|
||||
if (bitKeys.size() > 1)
|
||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||
|
||||
|
|
@ -160,10 +160,10 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
else { // this is the table with all the covariates
|
||||
for (Long k : bitKeys) {
|
||||
for (final Long k : bitKeys) {
|
||||
final RecalDatum empiricalQualCO = table.get(k);
|
||||
if (empiricalQualCO != null) {
|
||||
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue