The next round of BQSR optimizations: no more Long[] array creation
This commit is contained in:
parent
cd2074b1dc
commit
29a74908bb
|
|
@ -92,49 +92,33 @@ public class BQSRKeyManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Generates one key per optional covariate.
|
||||
* Generates one key given the optional covariate (or none if it is null)
|
||||
*
|
||||
* Keys include all required covariates, the standard covariate and the event type.
|
||||
*
|
||||
* Example allKeys:
|
||||
* RG, QUAL, CYCLE, CONTEXT
|
||||
*
|
||||
* List of BitSets returned by this example (given eventType):
|
||||
* RG, QUAL, CYCLE, EVENT
|
||||
* RG, QUAL, CONTEXT, EVENT
|
||||
*
|
||||
* Note: If there are no optional covariates, only one bitset key will be returned with all the required covariates and the event type
|
||||
*
|
||||
* @param allKeys The keys in long representation for each covariate
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in long representation per covariate
|
||||
* @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested)
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in long representation
|
||||
*/
|
||||
public Long[] longsFromAllKeys(final Long[] allKeys, final EventType eventType) {
|
||||
final ArrayList<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate
|
||||
public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) {
|
||||
|
||||
int covariateIndex = 0;
|
||||
int keyIndex = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (allKeys[covariateIndex++] << infoRequired.offset);
|
||||
masterKey |= (allKeys[keyIndex++] << infoRequired.offset);
|
||||
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
for (int i = 0; i < optionalCovariatesInfo.length; i++) {
|
||||
final Long covariateKey = allKeys[covariateIndex++];
|
||||
if (covariateKey == null)
|
||||
continue; // do not add nulls to the final set of keys.
|
||||
|
||||
long newKey = masterKey | (covariateKey << optionalCovariateOffset);
|
||||
newKey |= (optionalCovariatesInfo[i].covariateID << optionalCovariateIDOffset);
|
||||
|
||||
allFinalKeys.add(newKey); // add this key to the list of keys
|
||||
if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) {
|
||||
final Long covariateKey = allKeys[keyIndex + optionalCovariateIndex];
|
||||
if (covariateKey != null) { // do not add nulls to the final set of keys
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.length == 0) // special case when we have no optional covariates
|
||||
allFinalKeys.add(masterKey);
|
||||
|
||||
return allFinalKeys.toArray(new Long[allFinalKeys.size()]);
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -248,14 +232,14 @@ public class BQSRKeyManager {
|
|||
}
|
||||
|
||||
// cache the key representing an event since it's otherwise created a massive amount of times
|
||||
private static final Map<EventType, Long> eventTypeCache = new HashMap<EventType, Long>(EventType.values().length); // event IDs must be longs so that bit-fiddling works
|
||||
private static final long[] eventTypeCache = new long[EventType.values().length]; // event IDs must be longs so that bit-fiddling works
|
||||
static {
|
||||
for (final EventType eventType : EventType.values())
|
||||
eventTypeCache.put(eventType, (long)eventType.index);
|
||||
eventTypeCache[eventType.index] = (long)eventType.index;
|
||||
}
|
||||
|
||||
private Long keyFromEvent(final EventType eventType) {
|
||||
return eventTypeCache.get(eventType);
|
||||
private long keyFromEvent(final EventType eventType) {
|
||||
return eventTypeCache[eventType.index];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -65,9 +65,9 @@ public class ContextCovariate implements StandardCovariate {
|
|||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
final int l = read.getReadLength();
|
||||
final Long[] mismatches = new Long[l];
|
||||
final Long[] insertions = new Long[l];
|
||||
final Long[] deletions = new Long[l];
|
||||
final long[] mismatches = new long[l];
|
||||
final long[] insertions = new long[l];
|
||||
final long[] deletions = new long[l];
|
||||
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
|
|
@ -97,15 +97,15 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final Long key) {
|
||||
if (key == null) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
public String formatKey(final long key) {
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
return contextFromKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long longFromKey(Object key) {
|
||||
public long longFromKey(Object key) {
|
||||
return longFrom((String) key);
|
||||
}
|
||||
|
||||
|
|
@ -122,8 +122,8 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param contextSize context size to use building the context
|
||||
* @return the key representing the context
|
||||
*/
|
||||
private Long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
Long result = null;
|
||||
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
long result = -1;
|
||||
final int start = offset - contextSize + 1;
|
||||
if (start >= 0) {
|
||||
final byte[] context = Arrays.copyOfRange(bases, start, offset + 1);
|
||||
|
|
@ -138,10 +138,10 @@ public class ContextCovariate implements StandardCovariate {
|
|||
*
|
||||
* @param array any array
|
||||
*/
|
||||
private static void reverse(final Object[] array) {
|
||||
private static void reverse(final long[] array) {
|
||||
final int arrayLength = array.length;
|
||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||
final Object temp = array[l];
|
||||
final long temp = array[l];
|
||||
array[l] = array[r];
|
||||
array[r] = temp;
|
||||
}
|
||||
|
|
@ -150,12 +150,12 @@ public class ContextCovariate implements StandardCovariate {
|
|||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
|
||||
public static Long longFrom(final String dna) {
|
||||
public static long longFrom(final String dna) {
|
||||
return keyFromContext(dna.getBytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Long representation of a given dna string.
|
||||
* Creates a long representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
|
|
@ -172,7 +172,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param dna the dna sequence
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
public static Long keyFromContext(final byte[] dna) {
|
||||
public static long keyFromContext(final byte[] dna) {
|
||||
if (dna.length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param key the key representing the dna sequence
|
||||
* @return the dna sequence represented by the key
|
||||
*/
|
||||
public static String contextFromKey(Long key) {
|
||||
public static String contextFromKey(long key) {
|
||||
if (key < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
|
|
|
|||
|
|
@ -64,21 +64,21 @@ public interface Covariate {
|
|||
/**
|
||||
* Converts the internal representation of the key to String format for file output.
|
||||
*
|
||||
* @param key the Long representation of the key
|
||||
* @param key the long representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String formatKey(final Long key);
|
||||
public String formatKey(final long key);
|
||||
|
||||
/**
|
||||
* Converts an Object key into a Long key using only the lowest numberOfBits() bits
|
||||
* Converts an Object key into a long key using only the lowest numberOfBits() bits
|
||||
*
|
||||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates
|
||||
* the getValues method already returns all values in Long format.
|
||||
* the getValues method already returns all values in long format.
|
||||
*
|
||||
* @param key the object corresponding to the covariate
|
||||
* @return a long representation of the object
|
||||
*/
|
||||
public Long longFromKey(final Object key);
|
||||
public long longFromKey(final Object key);
|
||||
|
||||
/**
|
||||
* Each covariate should determine how many bits are necessary to encode it's data
|
||||
|
|
|
|||
|
|
@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private final Long[] mismatches;
|
||||
private final Long[] insertions;
|
||||
private final Long[] deletions;
|
||||
private final long[] mismatches;
|
||||
private final long[] insertions;
|
||||
private final long[] deletions;
|
||||
|
||||
public CovariateValues(Long[] mismatch, Long[] insertion, Long[] deletion) {
|
||||
public CovariateValues(final long[] mismatch, final long[] insertion, final long[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public Long[] getMismatches() {
|
||||
public long[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public Long[] getInsertions() {
|
||||
public long[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public Long[] getDeletions() {
|
||||
public long[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -60,7 +59,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
Long[] cycles = new Long[read.getReadLength()];
|
||||
long[] cycles = new long[read.getReadLength()];
|
||||
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||
|
||||
// Discrete cycle platforms
|
||||
|
|
@ -80,7 +79,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
|
||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : keyFromCycle(cycle);
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? -1 : keyFromCycle(cycle);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
|
@ -183,7 +182,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final Long key) {
|
||||
public String formatKey(final long key) {
|
||||
long cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
|
|
@ -191,7 +190,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Long longFromKey(final Object key) {
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? keyFromCycle(Short.parseShort((String) key)) : keyFromCycle((Short) key);
|
||||
}
|
||||
|
||||
|
|
@ -200,7 +199,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
return BQSRKeyManager.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative
|
||||
}
|
||||
|
||||
private static Long keyFromCycle(final short cycle) {
|
||||
private static long keyFromCycle(final short cycle) {
|
||||
// no negative values because
|
||||
long result = Math.abs(cycle);
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
|
|
|
|||
|
|
@ -47,9 +47,9 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int readLength = read.getReadLength();
|
||||
|
||||
Long[] mismatches = new Long[readLength];
|
||||
Long[] insertions = new Long[readLength];
|
||||
Long[] deletions = new Long[readLength];
|
||||
long[] mismatches = new long[readLength];
|
||||
long[] insertions = new long[readLength];
|
||||
long[] deletions = new long[readLength];
|
||||
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
|
|
@ -71,12 +71,12 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final Long key) {
|
||||
public String formatKey(final long key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long longFromKey(final Object key) {
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,19 +11,23 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private final Long[][] mismatchesKeySet;
|
||||
private final Long[][] insertionsKeySet;
|
||||
private final Long[][] deletionsKeySet;
|
||||
private final long[][] mismatchesKeySet;
|
||||
private final long[][] insertionsKeySet;
|
||||
private final long[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
|
||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||
this.mismatchesKeySet = new Long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new Long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new Long[readLength][numberOfCovariates];
|
||||
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.nextCovariateIndex = 0;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
nextCovariateIndex = 0;
|
||||
}
|
||||
|
||||
public void addCovariate(CovariateValues covariate) {
|
||||
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
|
||||
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
|
||||
|
|
@ -31,7 +35,7 @@ public class ReadCovariates {
|
|||
nextCovariateIndex++;
|
||||
}
|
||||
|
||||
public Long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
switch (errorModel) {
|
||||
case BASE_SUBSTITUTION:
|
||||
return getMismatchesKeySet(readPosition);
|
||||
|
|
@ -44,19 +48,19 @@ public class ReadCovariates {
|
|||
}
|
||||
}
|
||||
|
||||
public Long[] getMismatchesKeySet(int readPosition) {
|
||||
public long[] getMismatchesKeySet(final int readPosition) {
|
||||
return mismatchesKeySet[readPosition];
|
||||
}
|
||||
|
||||
public Long[] getInsertionsKeySet(int readPosition) {
|
||||
public long[] getInsertionsKeySet(final int readPosition) {
|
||||
return insertionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
public Long[] getDeletionsKeySet(int readPosition) {
|
||||
public long[] getDeletionsKeySet(final int readPosition) {
|
||||
return deletionsKeySet[readPosition];
|
||||
}
|
||||
|
||||
private void transposeCovariateValues(Long[][] keySet, Long[] covariateValues) {
|
||||
private void transposeCovariateValues(final long[][] keySet, final long[] covariateValues) {
|
||||
for (int i = 0; i < covariateValues.length; i++)
|
||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||
}
|
||||
|
|
@ -64,15 +68,15 @@ public class ReadCovariates {
|
|||
/**
|
||||
* Testing routines
|
||||
*/
|
||||
protected Long[][] getMismatchesKeySet() {
|
||||
protected long[][] getMismatchesKeySet() {
|
||||
return mismatchesKeySet;
|
||||
}
|
||||
|
||||
protected Long[][] getInsertionsKeySet() {
|
||||
protected long[][] getInsertionsKeySet() {
|
||||
return insertionsKeySet;
|
||||
}
|
||||
|
||||
protected Long[][] getDeletionsKeySet() {
|
||||
protected long[][] getDeletionsKeySet() {
|
||||
return deletionsKeySet;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
final int l = read.getReadLength();
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
final long key = keyForReadGroup(readGroupId);
|
||||
Long[] readGroups = new Long[l];
|
||||
long[] readGroups = new long[l];
|
||||
Arrays.fill(readGroups, key);
|
||||
return new CovariateValues(readGroups, readGroups, readGroups);
|
||||
}
|
||||
|
|
@ -65,12 +65,12 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final Long key) {
|
||||
public String formatKey(final long key) {
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long longFromKey(Object key) {
|
||||
public long longFromKey(Object key) {
|
||||
return keyForReadGroup((String) key);
|
||||
}
|
||||
|
||||
|
|
@ -79,7 +79,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
}
|
||||
|
||||
private Long keyForReadGroup(final String readGroupId) {
|
||||
private long keyForReadGroup(final String readGroupId) {
|
||||
if (!readGroupLookupTable.containsKey(readGroupId)) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
|
|
|
|||
|
|
@ -612,15 +612,27 @@ public class RecalDataManager {
|
|||
* @return a matrix with all the covariates calculated for every base in the read
|
||||
*/
|
||||
public static ReadCovariates computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates) {
|
||||
final int numRequestedCovariates = requestedCovariates.length;
|
||||
final int readLength = read.getReadLength();
|
||||
final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates);
|
||||
final ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), requestedCovariates.length);
|
||||
computeCovariates(read, requestedCovariates, readCovariates);
|
||||
return readCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes all requested covariates for every offset in the given read
|
||||
* by calling covariate.getValues(..).
|
||||
*
|
||||
* It populates an array of covariate values where result[i][j] is the covariate
|
||||
* value for the ith position in the read and the jth covariate in
|
||||
* reqeustedCovariates list.
|
||||
*
|
||||
* @param read The read for which to compute covariate values.
|
||||
* @param requestedCovariates The list of requested covariates.
|
||||
* @param readCovariates The object to store the covariate values
|
||||
*/
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
readCovariates.addCovariate(covariate.getValues(read));
|
||||
|
||||
return readCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -42,7 +42,9 @@ import java.util.*;
|
|||
|
||||
public class BaseRecalibration {
|
||||
private final static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||
private final static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
|
||||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
||||
private final ReadCovariates readCovariates;
|
||||
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
|
|
@ -64,6 +66,8 @@ public class BaseRecalibration {
|
|||
quantizationInfo.noQuantization();
|
||||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -77,6 +81,7 @@ public class BaseRecalibration {
|
|||
this.quantizationInfo = quantizationInfo;
|
||||
this.keysAndTablesMap = keysAndTablesMap;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -87,7 +92,8 @@ public class BaseRecalibration {
|
|||
* @param read the read to recalibrate
|
||||
*/
|
||||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); // compute all covariates for the read
|
||||
readCovariates.reset();
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
|
||||
|
|
@ -95,7 +101,7 @@ public class BaseRecalibration {
|
|||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final Long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
final long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
|
|
@ -123,25 +129,23 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(final Long[] key, final EventType errorModel) {
|
||||
protected byte performSequentialQualityCalculation(final long[] key, final EventType errorModel) {
|
||||
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
|
||||
double globalDeltaQ = 0.0;
|
||||
double deltaQReported = 0.0;
|
||||
double deltaQCovariates = 0.0;
|
||||
long masterKey;
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
final Map<Long, RecalDatum> table = mapEntry.getValue();
|
||||
|
||||
final Long[] bitKeys = keyManager.longsFromAllKeys(key, errorModel); // calculate the shift in quality due to the read group
|
||||
switch(keyManager.getNumRequiredCovariates()) {
|
||||
case 1: // this is the ReadGroup table
|
||||
if (bitKeys.length > 1)
|
||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||
|
||||
final RecalDatum empiricalQualRG = table.get(bitKeys[0]);
|
||||
masterKey = keyManager.createMasterKey(key, errorModel, -1);
|
||||
final RecalDatum empiricalQualRG = table.get(masterKey);
|
||||
if (empiricalQualRG != null) {
|
||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||
|
|
@ -149,19 +153,19 @@ public class BaseRecalibration {
|
|||
}
|
||||
break;
|
||||
case 2:
|
||||
if (keyManager.getNumOptionalCovariates() == 0) { // this is the QualityScore table
|
||||
if (bitKeys.length > 1)
|
||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||
|
||||
final RecalDatum empiricalQualQS = table.get(bitKeys[0]);
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) { // this is the QualityScore table
|
||||
masterKey = keyManager.createMasterKey(key, errorModel, -1);
|
||||
final RecalDatum empiricalQualQS = table.get(masterKey);
|
||||
if (empiricalQualQS != null) {
|
||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
}
|
||||
else { // this is the table with all the covariates
|
||||
for (final Long k : bitKeys) {
|
||||
final RecalDatum empiricalQualCO = table.get(k);
|
||||
for (int i = 0; i < numOptionalCovariates; i++) {
|
||||
masterKey = keyManager.createMasterKey(key, errorModel, i);
|
||||
final RecalDatum empiricalQualCO = table.get(masterKey);
|
||||
if (empiricalQualCO != null) {
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import org.testng.annotations.BeforeClass;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -81,7 +80,7 @@ public class BQSRKeyManagerUnitTest {
|
|||
}
|
||||
|
||||
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||
final Long[][][] covariateKeys = new Long[covariateList.size()][EventType.values().length][];
|
||||
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][];
|
||||
int i = 0;
|
||||
for (Covariate cov : covariateList) {
|
||||
cov.initialize(RAC);
|
||||
|
|
@ -102,13 +101,13 @@ public class BQSRKeyManagerUnitTest {
|
|||
BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
|
||||
for (int l = 0; l < read.getReadLength(); l++) {
|
||||
for (int eventType = 0; eventType < EventType.values().length; eventType++) {
|
||||
Long[] keySet = new Long[covariateList.size()];
|
||||
for (EventType eventType : EventType.values()) {
|
||||
long[] keySet = new long[covariateList.size()];
|
||||
Object[] expectedRequired = new Object[covariateList.size()];
|
||||
Object[] expectedCovariate = new Object[covariateList.size()];
|
||||
|
||||
for (int j = 0; j < covariateList.size(); j++) {
|
||||
keySet[j] = covariateKeys[j][eventType][l];
|
||||
keySet[j] = covariateKeys[j][eventType.index][l];
|
||||
|
||||
if (j < nRequired)
|
||||
expectedRequired[j] = covariateList.get(j).formatKey(keySet[j]);
|
||||
|
|
@ -116,28 +115,38 @@ public class BQSRKeyManagerUnitTest {
|
|||
expectedCovariate[j - nRequired] = covariateList.get(j).formatKey(keySet[j]);
|
||||
}
|
||||
|
||||
Long[] hashKeys = keyManager.longsFromAllKeys(keySet, EventType.eventFrom(eventType));
|
||||
short cov = 0;
|
||||
for (Long key : hashKeys) {
|
||||
Object[] actual = keyManager.keySetFrom(key).toArray();
|
||||
|
||||
// Build the expected array
|
||||
Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)];
|
||||
System.arraycopy(expectedRequired, 0, expected, 0, nRequired);
|
||||
if (optionalCovariates.size() > 0) {
|
||||
expected[expected.length-3] = expectedCovariate[cov];
|
||||
expected[expected.length-2] = optionalCovariates.get(cov++).getClass().getSimpleName().split("Covariate")[0];
|
||||
if (optionalCovariates.size() == 0) {
|
||||
final long masterKey = keyManager.createMasterKey(keySet, eventType, -1);
|
||||
testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, -1);
|
||||
} else {
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final long masterKey = keyManager.createMasterKey(keySet, eventType, j);
|
||||
testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, j);
|
||||
}
|
||||
expected[expected.length-1] = EventType.eventFrom(eventType);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void testKeys(final BQSRKeyManager keyManager, final long key, final int nRequired, final List<Covariate> optionalCovariates,
|
||||
final Object[] expectedRequired, final Object[] expectedCovariate, final EventType eventType, final int index) {
|
||||
|
||||
Object[] actual = keyManager.keySetFrom(key).toArray();
|
||||
|
||||
// Build the expected array
|
||||
Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)];
|
||||
System.arraycopy(expectedRequired, 0, expected, 0, nRequired);
|
||||
if (optionalCovariates.size() > 0) {
|
||||
expected[expected.length-3] = expectedCovariate[index];
|
||||
expected[expected.length-2] = optionalCovariates.get(index).getClass().getSimpleName().split("Covariate")[0];
|
||||
}
|
||||
expected[expected.length-1] = eventType;
|
||||
|
||||
// System.out.println("Actual : " + Utils.join(",", Arrays.asList(actual)));
|
||||
// System.out.println("Expected: " + Utils.join(",", Arrays.asList(expected)));
|
||||
// System.out.println();
|
||||
|
||||
for (int k = 0; k < expected.length; k++)
|
||||
Assert.assertEquals(actual[k], expected[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int k = 0; k < expected.length; k++)
|
||||
Assert.assertEquals(actual[k], expected[k]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,22 +84,20 @@ public class RecalibrationReportUnitTest {
|
|||
BQSRKeyManager keyManager = entry.getKey();
|
||||
Map<Long, RecalDatum> table = entry.getValue();
|
||||
|
||||
for (Long key : keyManager.longsFromAllKeys(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION)) {
|
||||
table.put(key, RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
nKeys++;
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) {
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, -1), RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, -1), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, -1), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys += 3;
|
||||
} else {
|
||||
for (int j = 0; j < numOptionalCovariates; j++) {
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, j), RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, j), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, j), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys += 3;
|
||||
}
|
||||
}
|
||||
|
||||
for (Long key : keyManager.longsFromAllKeys(rc.getInsertionsKeySet(offset), EventType.BASE_INSERTION)) {
|
||||
table.put(key, RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys++;
|
||||
}
|
||||
|
||||
|
||||
for (Long key : keyManager.longsFromAllKeys(rc.getDeletionsKeySet(offset), EventType.BASE_DELETION)) {
|
||||
table.put(key, RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(nKeys, expectedKeys);
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public class BaseRecalibrationUnitTest {
|
|||
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
Long[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
long[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
|
||||
Object[] objKey = buildObjectKey(bitKeys);
|
||||
|
||||
|
|
@ -98,9 +98,17 @@ public class BaseRecalibrationUnitTest {
|
|||
|
||||
RecalDatum newDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
Long[] keys = mapEntry.getKey().longsFromAllKeys(bitKeys, EventType.BASE_SUBSTITUTION);
|
||||
for (Long key : keys)
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), key, newDatum);
|
||||
final BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) {
|
||||
final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, -1);
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum);
|
||||
} else {
|
||||
for (int j = 0; j < numOptionalCovariates; j++) {
|
||||
final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, j);
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
|
@ -122,7 +130,7 @@ public class BaseRecalibrationUnitTest {
|
|||
public void testGoldStandardComparison() {
|
||||
debugTables();
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
Long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
Object [] objKey = buildObjectKey(bitKey);
|
||||
byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION);
|
||||
byte v1 = goldStandardSequentialCalculation(objKey);
|
||||
|
|
@ -130,7 +138,7 @@ public class BaseRecalibrationUnitTest {
|
|||
}
|
||||
}
|
||||
|
||||
private Object[] buildObjectKey(Long[] bitKey) {
|
||||
private Object[] buildObjectKey(long[] bitKey) {
|
||||
Object[] key = new Object[bitKey.length];
|
||||
key[0] = rgCovariate.formatKey(bitKey[0]);
|
||||
key[1] = qsCovariate.formatKey(bitKey[1]);
|
||||
|
|
|
|||
Loading…
Reference in New Issue