NestedHashMap-based implementation of BQSRv2 along with a few minor optimizations. Not a huge runtime upgrade over the long bitset approach, but it allows us to implement further optimizations going forward. Integration test change because the original version had a bug in the quantized qual table creation.
This commit is contained in:
parent
21d5e8a252
commit
1fafd9f6c8
|
|
@ -71,11 +71,13 @@ public class BQSRGatherer extends Gatherer {
|
|||
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
|
||||
File recal_out = new File(output.getName() + ".original");
|
||||
RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
// TODO -- fix me
|
||||
//RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else if (!RAC.NO_PLOTS) {
|
||||
File recal_out = new File(output.getName() + ".recal");
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
// TODO -- fix me
|
||||
//RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
|
|
|
|||
|
|
@ -1,329 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR
|
||||
*
|
||||
* It also handles the event type "covariate" which is not exactly a covariate, but is added as a key to the hashmap. The Key Manager will
|
||||
* add the event type as a bitset to the end of the covariate bitset key. This way, it won't get int the way of masking the information
|
||||
* out of the key for the actual covariates, and having the covariates handle it. The key manager handles the event type.
|
||||
*
|
||||
* The keys represented by this key manager will always have the same order:
|
||||
*
|
||||
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate1, OptionalCovariateID, EventType
|
||||
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate2, OptionalCovariateID, EventType
|
||||
* ...
|
||||
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariateN, OptionalCovariateID, EventType
|
||||
*
|
||||
*
|
||||
* Note that Optional Covariates are optional, and the Key Manager should operate without them if necessary.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/6/12
|
||||
*/
|
||||
public class BQSRKeyManager {
|
||||
|
||||
private final Covariate[] requiredCovariates;
|
||||
private final Covariate[] optionalCovariates;
|
||||
private final RequiredCovariateInfo[] requiredCovariatesInfo;
|
||||
private final OptionalCovariateInfo[] optionalCovariatesInfo;
|
||||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
|
||||
private final int optionalCovariateOffset;
|
||||
private final int optionalCovariateIDOffset;
|
||||
|
||||
private final long optionalCovariateMask; // Standard mask for optional covariates key
|
||||
private final long optionalCovariateIDMask; // Standard mask for optional covariates order key
|
||||
private final long eventIDMask; // Standard mask for event ID
|
||||
|
||||
/**
|
||||
* Initializes the KeyManager with the total number of covariates to use
|
||||
*
|
||||
* @param requiredCovariates the ordered list of required covariates
|
||||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(final List<Covariate> requiredCovariates, final List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new Covariate[requiredCovariates.size()];
|
||||
this.optionalCovariates = new Covariate[optionalCovariates.size()];
|
||||
requiredCovariatesInfo = new RequiredCovariateInfo[requiredCovariates.size()]; // initialize the required covariates list
|
||||
optionalCovariatesInfo = new OptionalCovariateInfo[optionalCovariates.size()]; // initialize the optional covariates list (size may be 0, it's okay)
|
||||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) { // create a list of required covariates with the extra information for key management
|
||||
final Covariate required = requiredCovariates.get(i);
|
||||
final int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
final long mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates[i] = required;
|
||||
requiredCovariatesInfo[i] = new RequiredCovariateInfo(nBits, nRequiredBits, mask, required); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
final int bitsInEventType = numberOfBitsToRepresent(EventType.values().length);
|
||||
eventIDMask = genericMask(nRequiredBits, bitsInEventType);
|
||||
|
||||
short id = 0;
|
||||
int nOptionalBits = 0;
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
final Covariate optional = optionalCovariates.get(i);
|
||||
nOptionalBits = Math.max(nOptionalBits, optional.numberOfBits()); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
this.optionalCovariates[i] = optional;
|
||||
optionalCovariatesInfo[i] = new OptionalCovariateInfo(id, optional);
|
||||
final String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
}
|
||||
|
||||
optionalCovariateOffset = nRequiredBits + bitsInEventType;
|
||||
optionalCovariateMask = genericMask(optionalCovariateOffset, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||
optionalCovariateIDOffset = nRequiredBits + bitsInEventType + nOptionalBits;
|
||||
final int nOptionalIDBits = numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||
optionalCovariateIDMask = genericMask(optionalCovariateIDOffset, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||
|
||||
final int totalNumberOfBits = optionalCovariateIDOffset + nOptionalIDBits; // total number of bits used in the final key
|
||||
if ( totalNumberOfBits > 64 )
|
||||
throw new UserException.BadInput("The total number of bits used for the master BQSR key is greater than 64 and cannot be represented in a long");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one key given the optional covariate (or none if it is null)
|
||||
*
|
||||
* Keys include all required covariates, the standard covariate and the event type.
|
||||
*
|
||||
* @param allKeys The keys in long representation for each covariate (includes all optional covariates, not just the one requested)
|
||||
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
|
||||
* @return one key in long representation (non-negative) or -1 for a bad key
|
||||
*/
|
||||
public long createMasterKey(final long[] allKeys, final EventType eventType, final int optionalCovariateIndex) {
|
||||
|
||||
int keyIndex = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (allKeys[keyIndex++] << infoRequired.offset);
|
||||
|
||||
final long eventKey = keyFromEvent(eventType); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariateIndex >= 0 && optionalCovariateIndex < optionalCovariates.length) {
|
||||
final long covariateKey = allKeys[keyIndex + optionalCovariateIndex];
|
||||
if (covariateKey < 0) // do not add "nulls" to the final set of keys
|
||||
return -1;
|
||||
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (optionalCovariatesInfo[optionalCovariateIndex].covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates one key for the covariates represented in Object[] key
|
||||
*
|
||||
* The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file)
|
||||
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one key, not many.
|
||||
*
|
||||
* Example key:
|
||||
* RG, QUAL, CYCLE, CYCLE_ID, EventType
|
||||
*
|
||||
* @param key list of objects produced by the required covariates followed by one or zero optional covariates.
|
||||
* @return a key representing these objects.
|
||||
*/
|
||||
public long longFromKey(Object[] key) {
|
||||
int requiredCovariate = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
masterKey |= (infoRequired.covariate.longFromKey(key[requiredCovariate++]) << infoRequired.offset);
|
||||
|
||||
final int eventIndex = key.length - 1; // the event type is always the last key
|
||||
final long eventKey = keyFromEvent((EventType) key[eventIndex]); // create a key for the event type
|
||||
masterKey |= (eventKey << nRequiredBits);
|
||||
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final int covariateIndex = requiredCovariatesInfo.length; // the optional covariate index in the key array
|
||||
final int covariateIDIndex = covariateIndex + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
final short covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
final OptionalCovariateInfo infoOptional = optionalCovariatesInfo[covariateID]; // so we can get the optional covariate information
|
||||
|
||||
final long covariateKey = infoOptional.covariate.longFromKey(key[covariateIndex]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
masterKey |= (covariateKey << optionalCovariateOffset);
|
||||
masterKey |= (infoOptional.covariateID << optionalCovariateIDOffset);
|
||||
}
|
||||
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Covariate id can be either the covariate name (String) or the actual id (short). This method
|
||||
* finds it's type and converts accordingly to the short notation.
|
||||
*
|
||||
* @param id the string or short representation of the optional covariate id
|
||||
* @return the short representation of the optional covariate id.
|
||||
*/
|
||||
private short parseCovariateID(final Object id) {
|
||||
return (id instanceof String) ? covariateNameToIDMap.get(id.toString()) : (Short) id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a key set of objects from a combined master key.
|
||||
*
|
||||
* Masks out each covariate independently and decodes their values (Object) into a keyset
|
||||
*
|
||||
* @param master the master representation of the keys
|
||||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(final long master) {
|
||||
final List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||
final long covariateKey = extractKeyFromMaster(master, info.mask, info.offset); // get the covariate's key
|
||||
objectKeys.add(info.covariate.formatKey(covariateKey)); // convert the key to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.length > 0) {
|
||||
final long covKey = extractKeyFromMaster(master, optionalCovariateMask, optionalCovariateOffset); // get the covariate's key
|
||||
final int covIDKey = (int)extractKeyFromMaster(master, optionalCovariateIDMask, optionalCovariateIDOffset); // get the covariate's id (to identify which covariate this is)
|
||||
Covariate covariate = optionalCovariatesInfo[(short)covIDKey].covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.formatKey(covKey)); // add the optional covariate key to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
|
||||
objectKeys.add(EventType.eventFrom((int)extractKeyFromMaster(master, eventIDMask, nRequiredBits))); // add the event type object to the key set
|
||||
|
||||
return objectKeys;
|
||||
}
|
||||
|
||||
public Covariate[] getRequiredCovariates() {
|
||||
return requiredCovariates;
|
||||
}
|
||||
|
||||
public Covariate[] getOptionalCovariates() {
|
||||
return optionalCovariates;
|
||||
}
|
||||
|
||||
public int getNumRequiredCovariates() {
|
||||
return requiredCovariates.length;
|
||||
}
|
||||
|
||||
public int getNumOptionalCovariates() {
|
||||
return optionalCovariates.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a mask for the requested covariate to extract the relevant key from a combined master key
|
||||
*
|
||||
* @param offset the offset into the master key
|
||||
* @param nBits the number of bits needed by the Covariate to represent its values
|
||||
* @return the mask relevant to the covariate
|
||||
*/
|
||||
private long genericMask(final int offset, final int nBits) {
|
||||
long mask = 0L;
|
||||
for ( int i = 0; i < nBits; i++ )
|
||||
mask |= 1L << (offset+i);
|
||||
return mask;
|
||||
}
|
||||
|
||||
private long extractKeyFromMaster(final long master, final long mask, final int offset) {
|
||||
long key = master & mask;
|
||||
return key >> offset;
|
||||
}
|
||||
|
||||
// cache the key representing an event since it's otherwise created a massive amount of times
|
||||
private static final long[] eventTypeCache = new long[EventType.values().length]; // event IDs must be longs so that bit-fiddling works
|
||||
static {
|
||||
for (final EventType eventType : EventType.values())
|
||||
eventTypeCache[eventType.index] = (long)eventType.index;
|
||||
}
|
||||
|
||||
private long keyFromEvent(final EventType eventType) {
|
||||
return eventTypeCache[eventType.index];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof BQSRKeyManager))
|
||||
return false;
|
||||
|
||||
BQSRKeyManager other = (BQSRKeyManager) o;
|
||||
if (this == other)
|
||||
return true;
|
||||
|
||||
if (requiredCovariatesInfo.length != other.requiredCovariatesInfo.length ||
|
||||
optionalCovariatesInfo.length != other.optionalCovariatesInfo.length)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < requiredCovariates.length; i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates[i];
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates[i];
|
||||
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < optionalCovariates.length; i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates[i];
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates[i];
|
||||
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of bits necessary to represent a given number of elements
|
||||
*
|
||||
* @param numberOfElements the number of elements to represent (must be positive)
|
||||
* @return the number of bits necessary to represent this many elements
|
||||
*/
|
||||
public static int numberOfBitsToRepresent(long numberOfElements) {
|
||||
if (numberOfElements < 0)
|
||||
throw new ReviewedStingException("Number of elements must be positive: " + numberOfElements);
|
||||
|
||||
if (numberOfElements == 1L)
|
||||
return 1; // special case
|
||||
|
||||
int n = 0;
|
||||
numberOfElements--;
|
||||
while (numberOfElements > 0) {
|
||||
numberOfElements = numberOfElements >> 1;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate information for each Covariate
|
||||
*/
|
||||
private static class RequiredCovariateInfo {
|
||||
public final int nBits; // number of bits for this key
|
||||
public final int offset; // the offset into the master key
|
||||
public final long mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||
|
||||
RequiredCovariateInfo(final int nBits, final int offset, final long mask, final Covariate covariate) {
|
||||
this.nBits = nBits;
|
||||
this.offset = offset;
|
||||
this.mask = mask;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
}
|
||||
|
||||
private static class OptionalCovariateInfo {
|
||||
public final long covariateID; // cache the covariate ID (must be a long so that bit-fiddling works)
|
||||
public final Covariate covariate;
|
||||
|
||||
OptionalCovariateInfo(final long covariateID, final Covariate covariate) {
|
||||
this.covariateID = covariateID;
|
||||
this.covariate = covariate;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -43,6 +43,9 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private int mismatchesContextSize;
|
||||
private int indelsContextSize;
|
||||
|
||||
// the maximum context size (number of bases) permitted; we need to keep the leftmost base free so that values are
|
||||
// not negative and we reserve 4 more bits to represent the length of the context; it takes 2 bits to encode one base.
|
||||
static final private int MAX_DNA_CONTEXT = 13;
|
||||
private byte LOW_QUAL_TAIL;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
|
|
@ -64,6 +67,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
|
||||
// TODO -- wrong: fix me
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
|
|
@ -73,7 +77,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
final int readLength = clippedRead.getReadLength();
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long indelKey = contextWith(bases, i, indelsContextSize);
|
||||
final int indelKey = contextWith(bases, i, indelsContextSize);
|
||||
values.addCovariate(contextWith(bases, i, mismatchesContextSize), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i));
|
||||
}
|
||||
}
|
||||
|
|
@ -85,7 +89,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
public String formatKey(final int key) {
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
|
|
@ -93,13 +97,8 @@ public class ContextCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(Object key) {
|
||||
return keyFromContext((String) key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Integer.bitCount(Integer.MAX_VALUE);
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyFromContext((String) value);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -110,130 +109,61 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param contextSize context size to use building the context
|
||||
* @return the key representing the context
|
||||
*/
|
||||
private long contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
private int contextWith(final byte[] bases, final int offset, final int contextSize) {
|
||||
final int start = offset - contextSize + 1;
|
||||
final long result;
|
||||
if (start >= 0)
|
||||
result = keyFromContext(bases, start, offset + 1);
|
||||
else
|
||||
result = -1L;
|
||||
return result;
|
||||
return (start >= 0) ? keyFromContext(bases, start, offset + 1) : -1;
|
||||
}
|
||||
|
||||
public static long keyFromContext(final String dna) {
|
||||
public static int keyFromContext(final String dna) {
|
||||
return keyFromContext(dna.getBytes(), 0, dna.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a long representation of a given dna string.
|
||||
* Creates a int representation of a given dna string.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* The bit representation of a dna string is the simple:
|
||||
* 0 A 4 AA 8 CA
|
||||
* 1 C 5 AC ...
|
||||
* 2 G 6 AG 1343 TTGGT
|
||||
* 3 T 7 AT 1364 TTTTT
|
||||
*
|
||||
* To convert from dna to number, we convert the dna string to base10 and add all combinations that
|
||||
* preceded the string (with smaller lengths).
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @param dna the dna sequence
|
||||
* @param start the start position in the byte array (inclusive)
|
||||
* @param end the end position in the array (exclusive)
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
public static long keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
final long preContext = combinationsPerLength[end - start - 1]; // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0L; // the number in base_10 that we are going to use to generate the bit set
|
||||
public static int keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
|
||||
// TODO -- bit fiddle to ge this all working in a single call to the method (mask out length, shift, OR length back in)
|
||||
|
||||
int key = end - start;
|
||||
int bitOffset = 4;
|
||||
for (int i = start; i < end; i++) {
|
||||
baseTen = (baseTen << 2); // multiply by 4
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
return -1L;
|
||||
baseTen += (long)baseIndex;
|
||||
return -1;
|
||||
key |= (baseIndex << bitOffset);
|
||||
bitOffset += 2;
|
||||
}
|
||||
return baseTen + preContext; // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
||||
static final private int MAX_DNA_CONTEXT = 31; // the maximum context size (number of bases) permitted in the "long bitset" implementation of the DNA <=> BitSet conversion.
|
||||
static final long[] combinationsPerLength = new long[MAX_DNA_CONTEXT + 1]; // keeps the memoized table with the number of combinations for each given DNA context length
|
||||
static {
|
||||
for (int i = 0; i < MAX_DNA_CONTEXT + 1; i++)
|
||||
computeCombinationsFor(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* The sum of all combinations of a context of a given length from length = 0 to length.
|
||||
*
|
||||
* Memoized implementation of sum(4^i) , where i=[0,length]
|
||||
*
|
||||
* @param length the length of the DNA context
|
||||
*/
|
||||
private static void computeCombinationsFor(final int length) {
|
||||
long combinations = 0L;
|
||||
for (int i = 1; i <= length; i++)
|
||||
combinations += (1L << 2 * i); // add all combinations with 4^i ( 4^i is the same as 2^(2*i) )
|
||||
combinationsPerLength[length] = combinations;
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a key into the dna string representation.
|
||||
*
|
||||
* Warning: This conversion is limited to long precision, therefore the dna sequence cannot
|
||||
* be longer than 31 bases.
|
||||
*
|
||||
* We calculate the length of the resulting DNA sequence by looking at the sum(4^i) that exceeds the
|
||||
* base_10 representation of the sequence. This is important for us to know how to bring the number
|
||||
* to a quasi-canonical base_4 representation, and to fill in leading A's (since A's are represented
|
||||
* as 0's and leading 0's are omitted).
|
||||
*
|
||||
* quasi-canonical because A is represented by a 0, therefore,
|
||||
* instead of : 0, 1, 2, 3, 10, 11, 12, ...
|
||||
* we have : 0, 1, 2, 3, 00, 01, 02, ...
|
||||
*
|
||||
* but we can correctly decode it because we know the final length.
|
||||
*
|
||||
* @param key the key representing the dna sequence
|
||||
* @return the dna sequence represented by the key
|
||||
*/
|
||||
public static String contextFromKey(long key) {
|
||||
public static String contextFromKey(final int key) {
|
||||
if (key < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = contextLengthFor(key); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
key -= combinationsPerLength[length - 1]; // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
final int length = key & 15; // the first 4 bits represent the length (in bp) of the context
|
||||
int mask = 48; // use the mask to pull out bases
|
||||
int offset = 4;
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (key > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
final byte base = (byte) (key & 3); // equivalent to (key % 4)
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(base));
|
||||
key = key >> 2; // divide by 4
|
||||
for (int i = 0; i < length; i++) {
|
||||
final int baseIndex = (key & mask) >> offset;
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
|
||||
mask = mask << 2; // move the mask over to the next 2 bits
|
||||
offset += 2;
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the length of the DNA context for a given base 10 number
|
||||
*
|
||||
* It is important to know the length given the base 10 number to calculate the number of combinations
|
||||
* and to disambiguate the "quasi-canonical" state.
|
||||
*
|
||||
* This method also calculates the number of combinations as a by-product, but since it memoizes the
|
||||
* results, a subsequent call to combinationsFor(length) is O(1).
|
||||
*
|
||||
* @param number the base 10 representation of the key
|
||||
* @return the length of the DNA context represented by this number
|
||||
*/
|
||||
private static int contextLengthFor(final long number) {
|
||||
int length = 1; // the calculated length of the DNA sequence given the base_10 representation of its BitSet.
|
||||
long combinations = combinationsPerLength[length]; // the next context (we advance it so we know which one was preceding it).
|
||||
while (combinations <= number) { // find the length of the dna string (length)
|
||||
length++;
|
||||
combinations = combinationsPerLength[length]; // calculate the next context
|
||||
}
|
||||
return length;
|
||||
return dna.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ public interface Covariate {
|
|||
* @param key the long representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String formatKey(final long key);
|
||||
public String formatKey(final int key);
|
||||
|
||||
/**
|
||||
* Converts an Object key into a long key using only the lowest numberOfBits() bits
|
||||
|
|
@ -75,18 +75,10 @@ public interface Covariate {
|
|||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates
|
||||
* the getValues method already returns all values in long format.
|
||||
*
|
||||
* @param key the object corresponding to the covariate
|
||||
* @param value the object corresponding to the covariate
|
||||
* @return a long representation of the object
|
||||
*/
|
||||
public long longFromKey(final Object key);
|
||||
|
||||
/**
|
||||
* Each covariate should determine how many bits are necessary to encode it's data
|
||||
*
|
||||
* @return The number of bits used to represent the values of this covariate.
|
||||
*/
|
||||
public int numberOfBits();
|
||||
|
||||
public int keyFromValue(final Object value);
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = readLength - CUSHION - 1;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final long key = (i<CUSHION || i>MAX_CYCLE) ? -1L : keyFromCycle(cycle);
|
||||
final int key = (i<CUSHION || i>MAX_CYCLE) ? -1 : keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, i);
|
||||
cycle += increment;
|
||||
}
|
||||
|
|
@ -106,22 +106,22 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
|
@ -132,7 +132,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
|
@ -143,22 +143,22 @@ public class CycleCovariate implements StandardCovariate {
|
|||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
|
|
@ -169,7 +169,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
final long key = keyFromCycle(cycle);
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
|
|
@ -190,26 +190,21 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
long cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
public String formatKey(final int key) {
|
||||
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
return String.format("%d", cycle);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? keyFromCycle(Integer.parseInt((String) key)) : keyFromCycle((Integer) key);
|
||||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return Integer.bitCount(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
private static long keyFromCycle(final int cycle) {
|
||||
private static int keyFromCycle(final int cycle) {
|
||||
// no negative values because values must fit into the first few bits of the long
|
||||
long result = Math.abs(cycle);
|
||||
int result = Math.abs(cycle);
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
|
|
@ -49,7 +48,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
final byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
values.addCovariate((long)baseQualities[i], (long)baseInsertionQualities[i], (long)baseDeletionQualities[i], i);
|
||||
values.addCovariate((int)baseQualities[i], (int)baseInsertionQualities[i], (int)baseDeletionQualities[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -60,17 +59,12 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
public String formatKey(final int key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(final Object key) {
|
||||
return (key instanceof String) ? (long)Byte.parseByte((String) key) : (long)(Byte) key;
|
||||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,13 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Class that encapsulates the information necessary for quality score quantization for BQSR
|
||||
|
|
@ -30,25 +31,17 @@ public class QuantizationInfo {
|
|||
this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals));
|
||||
}
|
||||
|
||||
public QuantizationInfo(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, int quantizationLevels) {
|
||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
Map<Long, RecalDatum> qualTable = null; // look for the quality score table
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
if (keyManager.getNumRequiredCovariates() == 2) // it should be the only one with 2 required covariates
|
||||
qualTable = entry.getValue();
|
||||
}
|
||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
||||
|
||||
if (qualTable == null)
|
||||
throw new ReviewedStingException("Could not find QualityScore table.");
|
||||
|
||||
for (RecalDatum datum : qualTable.values()) {
|
||||
int empiricalQual = (int) Math.round(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
long nObservations = datum.numObservations;
|
||||
qualHistogram[empiricalQual] += nObservations; // add the number of observations for every key
|
||||
for (final Object value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = (RecalDatum)value;
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
|
||||
}
|
||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||
quantizeQualityScores(quantizationLevels);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
|
|
@ -11,65 +9,56 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private final long[][] mismatchesKeySet;
|
||||
private final long[][] insertionsKeySet;
|
||||
private final long[][] deletionsKeySet;
|
||||
private final int[][][] keys;
|
||||
|
||||
private int currentCovariateIndex = 0;
|
||||
|
||||
public ReadCovariates(int readLength, int numberOfCovariates) {
|
||||
this.mismatchesKeySet = new long[readLength][numberOfCovariates];
|
||||
this.insertionsKeySet = new long[readLength][numberOfCovariates];
|
||||
this.deletionsKeySet = new long[readLength][numberOfCovariates];
|
||||
public ReadCovariates(final int readLength, final int numberOfCovariates) {
|
||||
keys = new int[EventType.values().length][readLength][numberOfCovariates];
|
||||
}
|
||||
|
||||
public void setCovariateIndex(final int index) {
|
||||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
public void addCovariate(final long mismatch, final long insertion, final long deletion, final int readOffset) {
|
||||
mismatchesKeySet[readOffset][currentCovariateIndex] = mismatch;
|
||||
insertionsKeySet[readOffset][currentCovariateIndex] = insertion;
|
||||
deletionsKeySet[readOffset][currentCovariateIndex] = deletion;
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
public long[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
switch (errorModel) {
|
||||
case BASE_SUBSTITUTION:
|
||||
return getMismatchesKeySet(readPosition);
|
||||
case BASE_INSERTION:
|
||||
return getInsertionsKeySet(readPosition);
|
||||
case BASE_DELETION:
|
||||
return getDeletionsKeySet(readPosition);
|
||||
default:
|
||||
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
|
||||
}
|
||||
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
return keys[errorModel.index][readPosition];
|
||||
}
|
||||
|
||||
public long[] getMismatchesKeySet(final int readPosition) {
|
||||
return mismatchesKeySet[readPosition];
|
||||
public int[][] getKeySet(final EventType errorModel) {
|
||||
return keys[errorModel.index];
|
||||
}
|
||||
|
||||
public long[] getInsertionsKeySet(final int readPosition) {
|
||||
return insertionsKeySet[readPosition];
|
||||
public int[] getMismatchesKeySet(final int readPosition) {
|
||||
return keys[EventType.BASE_SUBSTITUTION.index][readPosition];
|
||||
}
|
||||
|
||||
public long[] getDeletionsKeySet(final int readPosition) {
|
||||
return deletionsKeySet[readPosition];
|
||||
public int[] getInsertionsKeySet(final int readPosition) {
|
||||
return keys[EventType.BASE_INSERTION.index][readPosition];
|
||||
}
|
||||
|
||||
public int[] getDeletionsKeySet(final int readPosition) {
|
||||
return keys[EventType.BASE_DELETION.index][readPosition];
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing routines
|
||||
*/
|
||||
protected long[][] getMismatchesKeySet() {
|
||||
return mismatchesKeySet;
|
||||
protected int[][] getMismatchesKeySet() {
|
||||
return keys[EventType.BASE_SUBSTITUTION.index];
|
||||
}
|
||||
|
||||
protected long[][] getInsertionsKeySet() {
|
||||
return insertionsKeySet;
|
||||
protected int[][] getInsertionsKeySet() {
|
||||
return keys[EventType.BASE_INSERTION.index];
|
||||
}
|
||||
|
||||
protected long[][] getDeletionsKeySet() {
|
||||
return deletionsKeySet;
|
||||
protected int[][] getDeletionsKeySet() {
|
||||
return keys[EventType.BASE_DELETION.index];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,9 +40,9 @@ import java.util.HashMap;
|
|||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
private final HashMap<String, Long> readGroupLookupTable = new HashMap<String, Long>();
|
||||
private final HashMap<Long, String> readGroupReverseLookupTable = new HashMap<Long, String>();
|
||||
private long nextId = 0L;
|
||||
private final HashMap<String, Integer> readGroupLookupTable = new HashMap<String, Integer>();
|
||||
private final HashMap<Integer, String> readGroupReverseLookupTable = new HashMap<Integer, String>();
|
||||
private int nextId = 0;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
|
|
@ -51,7 +51,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
final long key = keyForReadGroup(readGroupId);
|
||||
final int key = keyForReadGroup(readGroupId);
|
||||
|
||||
final int l = read.getReadLength();
|
||||
for (int i = 0; i < l; i++)
|
||||
|
|
@ -64,21 +64,16 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final long key) {
|
||||
public String formatKey(final int key) {
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longFromKey(Object key) {
|
||||
return keyForReadGroup((String) key);
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyForReadGroup((String) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numberOfBits() {
|
||||
return BQSRKeyManager.numberOfBitsToRepresent(Short.MAX_VALUE);
|
||||
}
|
||||
|
||||
private long keyForReadGroup(final String readGroupId) {
|
||||
private int keyForReadGroup(final String readGroupId) {
|
||||
if (!readGroupLookupTable.containsKey(readGroupId)) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
|
|
|
|||
|
|
@ -32,11 +32,13 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -82,6 +84,14 @@ public class RecalDataManager {
|
|||
|
||||
private static final String SCRIPT_FILE = "BQSR.R";
|
||||
|
||||
private static final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
|
||||
|
||||
public enum SOLID_RECAL_MODE {
|
||||
/**
|
||||
|
|
@ -141,30 +151,6 @@ public class RecalDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the recalibration table -> key manager map
|
||||
*
|
||||
* @param requiredCovariates list of required covariates (in order)
|
||||
* @param optionalCovariates list of optional covariates (in order)
|
||||
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
||||
*/
|
||||
public static LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||
final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
final ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
final ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(); // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
}
|
||||
final Map<Long, RecalDatum> recalTable = new HashMap<Long, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||
return tablesAndKeysMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates two lists : required covariates and optional covariates based on the user's requests.
|
||||
*
|
||||
|
|
@ -223,42 +209,29 @@ public class RecalDataManager {
|
|||
logger.info("");
|
||||
}
|
||||
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap) {
|
||||
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = entry.getKey();
|
||||
final Map<Long, RecalDatum> recalTable = entry.getValue();
|
||||
for (final RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
|
||||
final boolean isReadGroupTable = tableIndex == 0; // special case for the read group table so we can print the extra column it needs.
|
||||
|
||||
final Covariate[] requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||
final Covariate[] optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
|
||||
for (final Covariate covariate : requiredList) {
|
||||
final String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
|
||||
}
|
||||
|
||||
if (optionalList.length > 0) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
}
|
||||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
if (isReadGroupTable)
|
||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
|
@ -269,42 +242,59 @@ public class RecalDataManager {
|
|||
|
||||
int rowIndex = 0;
|
||||
|
||||
for (Map.Entry<Long, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||
final Long bitSetKey = recalTableEntry.getKey();
|
||||
final Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||
final Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||
for (final Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||
final String columnName = iterator.next().getFirst();
|
||||
columnData.put(columnName, key);
|
||||
}
|
||||
final RecalDatum datum = recalTableEntry.getValue();
|
||||
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality());
|
||||
if (isReadGroupTable)
|
||||
columnData.put(iterator.next().getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
columnData.put(iterator.next().getFirst(), datum.numObservations);
|
||||
columnData.put(iterator.next().getFirst(), datum.numMismatches);
|
||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
||||
for (final NestedHashMap.Leaf row : table.getAllLeaves()) {
|
||||
final RecalDatum datum = (RecalDatum)row.value;
|
||||
final List<Object> keys = row.keys;
|
||||
|
||||
for (final Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||
final String columnName = dataEntry.getKey();
|
||||
final Object value = dataEntry.getValue();
|
||||
reportTable.set(rowIndex, columnName, value.toString());
|
||||
int columnIndex = 0;
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey((Integer)keys.get(columnIndex++)));
|
||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey((Integer) keys.get(columnIndex++)));
|
||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
||||
final int covariateIndex = (Integer)keys.get(columnIndex);
|
||||
final Covariate covariate = requestedCovariates[2 + covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(columnIndex+1);
|
||||
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(covariateKey));
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariateNameMap.get(covariate));
|
||||
}
|
||||
}
|
||||
|
||||
final EventType event = EventType.eventFrom((Integer)keys.get(columnIndex));
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), event);
|
||||
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
|
||||
|
||||
rowIndex++;
|
||||
}
|
||||
result.add(reportTable);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
private static String parseCovariateName(final Covariate covariate) {
|
||||
return covariate.getClass().getSimpleName().split("Covariate")[0];
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<Long, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||
private static void setReportTableCell(final GATKReportTable reportTable, final int rowIndex, final String columnName, final Object value) {
|
||||
reportTable.set(rowIndex, columnName, value.toString());
|
||||
}
|
||||
|
||||
private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List<GATKReportTable> recalTables, PrintStream outputFile) {
|
||||
public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||
}
|
||||
|
||||
public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) {
|
||||
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile);
|
||||
}
|
||||
|
||||
private static void outputRecalibrationReport(final GATKReportTable argumentTable, final GATKReportTable quantizationTable, final List<GATKReportTable> recalTables, final PrintStream outputFile) {
|
||||
final GATKReport report = new GATKReport();
|
||||
report.addTable(argumentTable);
|
||||
report.addTable(quantizationTable);
|
||||
|
|
@ -340,108 +330,87 @@ public class RecalDataManager {
|
|||
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, boolean keepIntermediates) {
|
||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, true);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||
public static void generateRecalibrationPlot(final File filename, final RecalibrationTables original, final RecalibrationTables recalibrated, final Covariate[] requestedCovariates, final boolean keepIntermediates) {
|
||||
final Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", false);
|
||||
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", requestedCovariates, true);
|
||||
writeCSV(files.getFirst(), original, "ORIGINAL", requestedCovariates, false);
|
||||
outputRecalibrationPlot(files, keepIntermediates);
|
||||
}
|
||||
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
final int QUALITY_SCORE_COVARIATE_INDEX = 1;
|
||||
final Map<Long, RecalDatum> deltaTable = new HashMap<Long, RecalDatum>();
|
||||
BQSRKeyManager deltaKeyManager = null;
|
||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||
final NestedHashMap deltaTable = new NestedHashMap();
|
||||
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getNumOptionalCovariates() > 0) { // initialize with the 'all covariates' table
|
||||
// create a key manager for the delta table
|
||||
final List<Covariate> requiredCovariates = Arrays.asList(keyManager.getRequiredCovariates()[0]); // include the read group covariate as the only required covariate
|
||||
final List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
optionalCovariates.add(keyManager.getRequiredCovariates()[1]); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(Arrays.asList(keyManager.getOptionalCovariates())); // include all optional covariates
|
||||
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
}
|
||||
// add the quality score table to the delta table
|
||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
for (final NestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(leaf.keys.get(0));
|
||||
newCovs.add(requestedCovariates.length); // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||
newCovs.add(leaf.keys.get(1));
|
||||
newCovs.add(leaf.keys.get(2));
|
||||
addToDeltaTable(deltaTable, newCovs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
|
||||
if (deltaKeyManager == null)
|
||||
throw new ReviewedStingException ("Couldn't find the covariates table");
|
||||
|
||||
boolean readyToPrint = false;
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
final BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getNumRequiredCovariates() == 2 && keyManager.getNumOptionalCovariates() == 0) { // look for the QualityScore table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score
|
||||
newCovs.add(1, covs.get(1));
|
||||
newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||
newCovs.add(3, covs.get(2));
|
||||
final long deltaKey = deltaKeyManager.longFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
else if (keyManager.getNumOptionalCovariates() > 0) { // look for the optional covariates table
|
||||
final Map<Long, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (final Map.Entry<Long, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
final RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
final List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
final long deltaKey = deltaKeyManager.longFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
readyToPrint = true;
|
||||
}
|
||||
|
||||
// output the csv file
|
||||
if (readyToPrint) {
|
||||
|
||||
if (printHeader) {
|
||||
final List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
header.add("CovariateValue");
|
||||
header.add("CovariateName");
|
||||
header.add("EventType");
|
||||
header.add("Observations");
|
||||
header.add("Errors");
|
||||
header.add("EmpiricalQuality");
|
||||
header.add("AverageReportedQuality");
|
||||
header.add("Accuracy");
|
||||
header.add("Recalibration");
|
||||
deltaTableFile.println(Utils.join(",", header));
|
||||
}
|
||||
|
||||
// print each data line
|
||||
for (final Map.Entry<Long, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
final List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
final RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
final NestedHashMap covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE);
|
||||
for (final NestedHashMap.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final List<Object> covs = new ArrayList<Object>(leaf.keys);
|
||||
covs.remove(1); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
addToDeltaTable(deltaTable, covs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
|
||||
// output the csv file
|
||||
if (printHeader) {
|
||||
final List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
header.add("CovariateValue");
|
||||
header.add("CovariateName");
|
||||
header.add("EventType");
|
||||
header.add("Observations");
|
||||
header.add("Errors");
|
||||
header.add("EmpiricalQuality");
|
||||
header.add("AverageReportedQuality");
|
||||
header.add("Accuracy");
|
||||
header.add("Recalibration");
|
||||
deltaTableFile.println(Utils.join(",", header));
|
||||
}
|
||||
|
||||
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||
|
||||
// print each data line
|
||||
for (final NestedHashMap.Leaf leaf : deltaTable.getAllLeaves()) {
|
||||
final List<Object> deltaKeys = generateValuesFromKeys(leaf.keys, requestedCovariates, covariateNameMap);
|
||||
final RecalDatum deltaDatum = (RecalDatum)leaf.value;
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
final List<Object> values = new ArrayList<Object>(4);
|
||||
values.add(covariates[0].formatKey((Integer)keys.get(0)));
|
||||
|
||||
// TODO -- create static final variables to hold the indexes of the RG, qual, cov ID, etc.
|
||||
|
||||
final int covariateIndex = (Integer)keys.get(1);
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[1] : covariates[2 + covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(2);
|
||||
values.add(covariate.formatKey(covariateKey));
|
||||
values.add(covariateNameMap.get(covariate));
|
||||
|
||||
final EventType event = EventType.eventFrom((Integer)keys.get(3));
|
||||
values.add(event);
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -453,15 +422,14 @@ public class RecalDataManager {
|
|||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
*/
|
||||
private static void addToDeltaTable(Map<Long, RecalDatum> deltaTable, Long deltaKey, RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final Object[] deltaKey, final RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
deltaTable.put(new RecalDatum(recalDatum), deltaKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||
*
|
||||
|
|
@ -627,13 +595,13 @@ public class RecalDataManager {
|
|||
*
|
||||
* @param read The read for which to compute covariate values.
|
||||
* @param requestedCovariates The list of requested covariates.
|
||||
* @param readCovariates The object to store the covariate values
|
||||
* @param resultsStorage The object to store the covariate values
|
||||
*/
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates readCovariates) {
|
||||
public static void computeCovariates(final GATKSAMRecord read, final Covariate[] requestedCovariates, final ReadCovariates resultsStorage) {
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for (int i = 0; i < requestedCovariates.length; i++) {
|
||||
readCovariates.setCovariateIndex(i);
|
||||
requestedCovariates[i].recordValues(read, readCovariates);
|
||||
resultsStorage.setCovariateIndex(i);
|
||||
requestedCovariates[i].recordValues(read, resultsStorage);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -113,8 +113,7 @@ public class RecalDatum extends Datum {
|
|||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -18,14 +19,19 @@ import java.util.*;
|
|||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
private final HashMap<String, Integer> optionalCovariateIndexes;
|
||||
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
private final Object[] tempRGarray = new Object[2];
|
||||
private final Object[] tempQUALarray = new Object[3];
|
||||
private final Object[] tempCOVarray = new Object[5];
|
||||
|
||||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
GATKReport report = new GATKReport(RECAL_FILE);
|
||||
final GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
||||
argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
RAC = initializeArgumentCollectionTable(argumentTable);
|
||||
|
|
@ -37,52 +43,39 @@ public class RecalibrationReport {
|
|||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
optionalCovariateIndexes = new HashMap<String, Integer>(optionalCovariates.size());
|
||||
int covariateIndex = 0;
|
||||
for (final Covariate covariate : requiredCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates) {
|
||||
requestedCovariates[covariateIndex] = covariate;
|
||||
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
|
||||
covariateIndex++;
|
||||
}
|
||||
|
||||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||
for (Covariate covariate : requiredCovariates) {
|
||||
requiredCovariatesToAdd.add(covariate);
|
||||
final Map<Long, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
final GATKReportTable rgReportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap rgTable = parseReadGroupTable(rgReportTable);
|
||||
|
||||
final int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
table = parseReadGroupTable(keyManager, reportTable);
|
||||
}
|
||||
else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
table = parseQualityScoreTable(keyManager, reportTable);
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION);
|
||||
final GATKReportTable qualReportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap qualTable = parseQualityScoreTable(qualReportTable);
|
||||
|
||||
keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map
|
||||
}
|
||||
final GATKReportTable covReportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap covTable = parseAllCovariatesTable(covReportTable);
|
||||
|
||||
|
||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final Map<Long, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||
keysAndTablesMap.put(keyManager, table);
|
||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
}
|
||||
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.keysAndTablesMap = keysAndTablesMap;
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
this.argumentTable = argumentTable;
|
||||
this.RAC = RAC;
|
||||
this.requestedCovariates = null;
|
||||
this.optionalCovariateIndexes = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -98,29 +91,20 @@ public class RecalibrationReport {
|
|||
*
|
||||
* @param other the recalibration report to combine with this one
|
||||
*/
|
||||
public void combine(RecalibrationReport other) {
|
||||
Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||
public void combine(final RecalibrationReport other) {
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> otherEntry : other.getKeysAndTablesMap().entrySet()) {
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> thisEntry = thisIterator.next();
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedHashMap myTable = recalibrationTables.getTable(type);
|
||||
final NestedHashMap otherTable = other.recalibrationTables.getTable(type);
|
||||
|
||||
final Map<Long, RecalDatum> thisTable = thisEntry.getValue();
|
||||
final BQSRKeyManager thisKeyManager = thisEntry.getKey();
|
||||
final BQSRKeyManager otherKeyManager = otherEntry.getKey();
|
||||
for (final NestedHashMap.Leaf row : otherTable.getAllLeaves()) {
|
||||
final RecalDatum myDatum = (RecalDatum)myTable.get(row.keys);
|
||||
|
||||
for (Map.Entry<Long, RecalDatum> otherTableEntry : otherEntry.getValue().entrySet()) {
|
||||
final RecalDatum otherDatum = otherTableEntry.getValue();
|
||||
final Long otherBitKey = otherTableEntry.getKey();
|
||||
final List<Object> otherObjectKey = otherKeyManager.keySetFrom(otherBitKey);
|
||||
|
||||
final long thisKey = thisKeyManager.longFromKey(otherObjectKey.toArray());
|
||||
final RecalDatum thisDatum = thisTable.get(thisKey);
|
||||
|
||||
if (thisDatum == null)
|
||||
thisTable.put(thisKey, otherDatum);
|
||||
if (myDatum == null)
|
||||
myTable.put(row.value, row.keys);
|
||||
else
|
||||
thisDatum.combine(otherDatum);
|
||||
}
|
||||
myDatum.combine((RecalDatum)row.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -128,8 +112,8 @@ public class RecalibrationReport {
|
|||
return quantizationInfo;
|
||||
}
|
||||
|
||||
public LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> getKeysAndTablesMap() {
|
||||
return keysAndTablesMap;
|
||||
public RecalibrationTables getRecalibrationTables() {
|
||||
return recalibrationTables;
|
||||
}
|
||||
|
||||
public Covariate[] getRequestedCovariates() {
|
||||
|
|
@ -139,82 +123,87 @@ public class RecalibrationReport {
|
|||
/**
|
||||
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, false);
|
||||
private NestedHashMap parseAllCovariatesTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
tempCOVarray[2] = covIndex;
|
||||
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[3] = requestedCovariates[covIndex + 2].keyFromValue(covValue);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[4] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, false);
|
||||
private NestedHashMap parseQualityScoreTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
|
||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shared parsing functionality for all tables.
|
||||
*
|
||||
* @param keyManager the key manager for this table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private Map<Long, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList, boolean hasEstimatedQReportedColumn) {
|
||||
final Map<Long, RecalDatum> result = new HashMap<Long, RecalDatum>(reportTable.getNumRows()*2);
|
||||
private NestedHashMap parseReadGroupTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final int nKeys = columnNamesOrderedList.size();
|
||||
final Object [] keySet = new Object[nKeys];
|
||||
for (int j = 0; j < nKeys; j++)
|
||||
keySet[j] = reportTable.get(i, columnNamesOrderedList.get(j)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
|
||||
final long bitKey = keyManager.longFromKey(keySet);
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.index;
|
||||
|
||||
final long nObservations = (Long) reportTable.get(i, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(i, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(i, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(i, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
final RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
result.put(bitKey, recalDatum);
|
||||
result.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||
final long nObservations = (Long) reportTable.get(row, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
final long nErrors = (Long) reportTable.get(row, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||
final double empiricalQuality = (Double) reportTable.get(row, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ? // the estimatedQreported column only exists in the ReadGroup table
|
||||
(Double) reportTable.get(row, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalDataManager.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
return new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
||||
*
|
||||
|
|
@ -308,55 +297,21 @@ public class RecalibrationReport {
|
|||
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||
*/
|
||||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (Map<Long, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality();
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
||||
for (final Object value : table.getAllValues()) {
|
||||
((RecalDatum)value).calcCombinedEmpiricalQuality();
|
||||
}
|
||||
}
|
||||
|
||||
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
|
||||
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
||||
public void output(PrintStream output) {
|
||||
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, keysAndTablesMap, output);
|
||||
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output);
|
||||
}
|
||||
|
||||
public RecalibrationArgumentCollection getRAC() {
|
||||
return RAC;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof RecalibrationReport))
|
||||
return false;
|
||||
RecalibrationReport other = (RecalibrationReport) o;
|
||||
if (this == o)
|
||||
return true;
|
||||
return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap);
|
||||
}
|
||||
|
||||
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> t2) {
|
||||
if (t1.size() != t2.size())
|
||||
return false;
|
||||
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||
final Iterator<Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||
|
||||
while (t1Iterator.hasNext() && t2Iterator.hasNext()) {
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||
Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||
|
||||
if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey())))
|
||||
return false;
|
||||
|
||||
final Map<Long, RecalDatum> table2 = t2MapEntry.getValue();
|
||||
for (Map.Entry<Long, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||
final Long t1Key = t1TableEntry.getKey();
|
||||
if (!table2.containsKey(t1Key))
|
||||
return false;
|
||||
final RecalDatum t1Datum = t1TableEntry.getValue();
|
||||
if (!t1Datum.equals(table2.get(t1Key)))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
||||
*/
|
||||
|
|
@ -47,6 +49,20 @@ public class BaseUtils {
|
|||
public boolean sameBase(int i) { return index == i; }
|
||||
}
|
||||
|
||||
static private final int[] baseIndexMap = new int[256];
|
||||
static {
|
||||
Arrays.fill(baseIndexMap, -1);
|
||||
baseIndexMap['A'] = 0;
|
||||
baseIndexMap['a'] = 0;
|
||||
baseIndexMap['*'] = 0; // the wildcard character counts as an A
|
||||
baseIndexMap['C'] = 1;
|
||||
baseIndexMap['c'] = 1;
|
||||
baseIndexMap['G'] = 2;
|
||||
baseIndexMap['g'] = 2;
|
||||
baseIndexMap['T'] = 3;
|
||||
baseIndexMap['t'] = 3;
|
||||
}
|
||||
|
||||
// todo -- fix me (enums?)
|
||||
public static final byte DELETION_INDEX = 4;
|
||||
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
|
||||
|
|
@ -182,27 +198,7 @@ public class BaseUtils {
|
|||
* @return 0, 1, 2, 3, or -1 if the base can't be understood
|
||||
*/
|
||||
static public int simpleBaseToBaseIndex(byte base) {
|
||||
switch (base) {
|
||||
case '*': // the wildcard character counts as an A
|
||||
case 'A':
|
||||
case 'a':
|
||||
return 0;
|
||||
|
||||
case 'C':
|
||||
case 'c':
|
||||
return 1;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return 2;
|
||||
|
||||
case 'T':
|
||||
case 't':
|
||||
return 3;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return baseIndexMap[base];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -213,27 +209,7 @@ public class BaseUtils {
|
|||
*/
|
||||
@Deprecated
|
||||
static public int simpleBaseToBaseIndex(char base) {
|
||||
switch (base) {
|
||||
case '*': // the wildcard character counts as an A
|
||||
case 'A':
|
||||
case 'a':
|
||||
return 0;
|
||||
|
||||
case 'C':
|
||||
case 'c':
|
||||
return 1;
|
||||
|
||||
case 'G':
|
||||
case 'g':
|
||||
return 2;
|
||||
|
||||
case 'T':
|
||||
case 't':
|
||||
return 3;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return baseIndexMap[base];
|
||||
}
|
||||
|
||||
static public int extendedBaseToBaseIndex(byte base) {
|
||||
|
|
@ -284,11 +260,6 @@ public class BaseUtils {
|
|||
}
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
static public char baseIndexToSimpleBaseAsChar(int baseIndex) {
|
||||
return (char) baseIndexToSimpleBase(baseIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a base index to a base index representing its cross-talk partner
|
||||
*
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
|
@ -83,4 +85,53 @@ public class NestedHashMap {
|
|||
|
||||
return value; // todo -- should never reach this point
|
||||
}
|
||||
|
||||
public List<Object> getAllValues() {
|
||||
List<Object> result = new ArrayList<Object>();
|
||||
fillAllValues(data, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void fillAllValues(final Map map, final List<Object> result) {
|
||||
for ( Object value : map.values() ) {
|
||||
if ( value == null )
|
||||
continue;
|
||||
if ( value instanceof Map )
|
||||
fillAllValues((Map)value, result);
|
||||
else
|
||||
result.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Leaf {
|
||||
public final List<Object> keys;
|
||||
public final Object value;
|
||||
|
||||
public Leaf(final List<Object> keys, final Object value) {
|
||||
this.keys = keys;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
public List<Leaf> getAllLeaves() {
|
||||
List<Leaf> result = new ArrayList<Leaf>();
|
||||
List<Object> path = new ArrayList<Object>();
|
||||
fillAllLeaves(data, path, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private void fillAllLeaves(final Map map, final List<Object> path, final List<Leaf> result) {
|
||||
for ( final Object key : map.keySet() ) {
|
||||
final Object value = map.get(key);
|
||||
if ( value == null )
|
||||
continue;
|
||||
final List<Object> newPath = new ArrayList<Object>(path);
|
||||
newPath.add(key);
|
||||
if ( value instanceof Map ) {
|
||||
fillAllLeaves((Map) value, newPath, result);
|
||||
} else {
|
||||
result.add(new Leaf(newPath, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,10 +28,10 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Utility methods to facilitate on-the-fly base quality score recalibration.
|
||||
|
|
@ -45,39 +45,15 @@ public class BaseRecalibration {
|
|||
private final ReadCovariates readCovariates;
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final KeysAndTables keysAndTables;
|
||||
private final RecalibrationTables recalibrationTables;
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
static class KeysAndTables {
|
||||
private final Object[] tempKeySet;
|
||||
|
||||
public enum Type {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLE(2);
|
||||
|
||||
private final int index;
|
||||
|
||||
private Type(int index) {
|
||||
this.index = index;
|
||||
}
|
||||
}
|
||||
|
||||
public final BQSRKeyManager[] managers = new BQSRKeyManager[Type.values().length];
|
||||
public final Map<Long, RecalDatum>[] tables = new Map[Type.values().length];
|
||||
|
||||
public KeysAndTables(final Map<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap) {
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
Type type;
|
||||
if (mapEntry.getKey().getNumRequiredCovariates() == 1)
|
||||
type = Type.READ_GROUP_TABLE;
|
||||
else if (mapEntry.getKey().getNumOptionalCovariates() == 0)
|
||||
type = Type.QUALITY_SCORE_TABLE;
|
||||
else
|
||||
type = Type.OPTIONAL_COVARIATE_TABLE;
|
||||
managers[type.index] = mapEntry.getKey();
|
||||
tables[type.index] = mapEntry.getValue();
|
||||
}
|
||||
}
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
for (int i = 0; i < EventType.values().length; i++)
|
||||
qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -89,7 +65,7 @@ public class BaseRecalibration {
|
|||
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
||||
keysAndTables = new KeysAndTables(recalibrationReport.getKeysAndTablesMap());
|
||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
||||
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
||||
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
||||
|
|
@ -98,20 +74,22 @@ public class BaseRecalibration {
|
|||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
tempKeySet = new Integer[requestedCovariates.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor only exists for testing purposes.
|
||||
*
|
||||
* @param quantizationInfo the quantization info object
|
||||
* @param keysAndTablesMap the map of key managers and recalibration tables
|
||||
* @param recalibrationTables the map of key managers and recalibration tables
|
||||
* @param requestedCovariates the list of requested covariates
|
||||
*/
|
||||
protected BaseRecalibration(final QuantizationInfo quantizationInfo, final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap, final Covariate[] requestedCovariates) {
|
||||
protected BaseRecalibration(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
keysAndTables = new KeysAndTables(keysAndTablesMap);
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
tempKeySet = new Integer[requestedCovariates.length];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -125,13 +103,20 @@ public class BaseRecalibration {
|
|||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model
|
||||
|
||||
final int readLength = read.getReadLength();
|
||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||
|
||||
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
|
||||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final long[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||
Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(wrapKeySet(keySet));
|
||||
if (recalibratedQualityScore == null) {
|
||||
recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, keySet);
|
||||
}
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
}
|
||||
|
|
@ -139,7 +124,11 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private Object[] wrapKeySet(final int[] keySet) {
|
||||
for (int i = 0; i < keySet.length; i++)
|
||||
tempKeySet[i] = keySet[i];
|
||||
return tempKeySet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
|
|
@ -158,24 +147,23 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(final long[] key, final EventType errorModel) {
|
||||
protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(keysAndTables.managers[KeysAndTables.Type.READ_GROUP_TABLE.index], keysAndTables.tables[KeysAndTables.Type.READ_GROUP_TABLE.index], key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(keysAndTables.managers[KeysAndTables.Type.QUALITY_SCORE_TABLE.index], keysAndTables.tables[KeysAndTables.Type.QUALITY_SCORE_TABLE.index], key, errorModel, globalDeltaQ);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(keysAndTables.managers[KeysAndTables.Type.OPTIONAL_COVARIATE_TABLE.index], keysAndTables.tables[KeysAndTables.Type.OPTIONAL_COVARIATE_TABLE.index], key, errorModel, globalDeltaQ, deltaQReported);
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE), key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||
|
||||
final byte qualFromRead = (byte)key[1];
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
|
||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||
}
|
||||
|
||||
private double calculateGlobalDeltaQ(final BQSRKeyManager keyManager, final Map<Long, RecalDatum> table, final long[] key, final EventType errorModel) {
|
||||
private double calculateGlobalDeltaQ(final NestedHashMap table, final int[] key, final EventType errorModel) {
|
||||
double result = 0.0;
|
||||
|
||||
final long masterKey = keyManager.createMasterKey(key, errorModel, -1);
|
||||
final RecalDatum empiricalQualRG = table.get(masterKey);
|
||||
final RecalDatum empiricalQualRG = (RecalDatum)table.get(key[0], errorModel.index);
|
||||
if (empiricalQualRG != null) {
|
||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||
|
|
@ -185,32 +173,28 @@ public class BaseRecalibration {
|
|||
return result;
|
||||
}
|
||||
|
||||
private double calculateDeltaQReported(final BQSRKeyManager keyManager, final Map<Long, RecalDatum> table, final long[] key, final EventType errorModel, final double globalDeltaQ) {
|
||||
private double calculateDeltaQReported(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final long masterKey = keyManager.createMasterKey(key, errorModel, -1);
|
||||
final RecalDatum empiricalQualQS = table.get(masterKey);
|
||||
final RecalDatum empiricalQualQS = (RecalDatum)table.get(key[0], key[1], errorModel.index);
|
||||
if (empiricalQualQS != null) {
|
||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||
final byte qualFromRead = (byte)key[1];
|
||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private double calculateDeltaQCovariates(final BQSRKeyManager keyManager, final Map<Long, RecalDatum> table, final long[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported) {
|
||||
private double calculateDeltaQCovariates(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
for (int i = 0; i < numOptionalCovariates; i++) {
|
||||
final long masterKey = keyManager.createMasterKey(key, errorModel, i);
|
||||
if (masterKey < 0)
|
||||
// for all optional covariates
|
||||
for (int i = 2; i < requestedCovariates.length; i++) {
|
||||
if (key[i] < 0)
|
||||
continue;
|
||||
final RecalDatum empiricalQualCO = table.get(masterKey);
|
||||
final RecalDatum empiricalQualCO = (RecalDatum)table.get(key[0], key[1], (i-2), key[i], errorModel.index);
|
||||
if (empiricalQualCO != null) {
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
final byte qualFromRead = (byte)key[1];
|
||||
result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
|
||||
/**
|
||||
* Utility class to facilitate on-the-fly base quality score recalibration.
|
||||
*
|
||||
* User: ebanks
|
||||
* Date: 6/20/12
|
||||
*/
|
||||
|
||||
public class RecalibrationTables {
|
||||
|
||||
public enum TableType {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLE(2);
|
||||
|
||||
private final int index;
|
||||
|
||||
private TableType(final int index) {
|
||||
this.index = index;
|
||||
}
|
||||
}
|
||||
|
||||
private final NestedHashMap[] tables = new NestedHashMap[TableType.values().length];
|
||||
|
||||
public RecalibrationTables(final NestedHashMap rgMap, final NestedHashMap qualMap, final NestedHashMap covMap) {
|
||||
tables[TableType.READ_GROUP_TABLE.index] = rgMap;
|
||||
tables[TableType.QUALITY_SCORE_TABLE.index] = qualMap;
|
||||
tables[TableType.OPTIONAL_COVARIATE_TABLE.index] = covMap;
|
||||
}
|
||||
|
||||
public NestedHashMap getTable(final TableType type) {
|
||||
return tables[type.index];
|
||||
}
|
||||
}
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/7/12
|
||||
*/
|
||||
public class BQSRKeyManagerUnitTest {
|
||||
RecalibrationArgumentCollection RAC;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
RAC = new RecalibrationArgumentCollection();
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testCombineBitSets() {
|
||||
final int nRequired = 2;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>();
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
covariates.add(new CycleCovariate());
|
||||
covariates.add(new ContextCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOnlyRequiredCovariates() {
|
||||
final int nRequired = 2;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(2);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOnlyOneCovariate() {
|
||||
final int nRequired = 1;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(2);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testOneCovariateWithOptionalCovariates() {
|
||||
final int nRequired = 1;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(4);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
covariates.add(new CycleCovariate());
|
||||
covariates.add(new ContextCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
private void createReadAndTest(List<Covariate> covariates, int nRequired) {
|
||||
int readLength = 1000;
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");
|
||||
read.setReadGroup(new GATKSAMReadGroupRecord("ID"));
|
||||
read.getReadGroup().setPlatform("illumina");
|
||||
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setSecondOfPairFlag(true);
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
runTestOnRead(read, covariates, nRequired);
|
||||
}
|
||||
|
||||
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||
final long[][][] covariateKeys = new long[covariateList.size()][EventType.values().length][read.getReadLength()];
|
||||
ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), covariateList.size());
|
||||
for (int i = 0; i < covariateList.size(); i++) {
|
||||
final Covariate cov = covariateList.get(i);
|
||||
cov.initialize(RAC);
|
||||
readCovariates.setCovariateIndex(i);
|
||||
cov.recordValues(read, readCovariates);
|
||||
}
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
for (EventType eventType : EventType.values()) {
|
||||
final long[] vals = readCovariates.getKeySet(i, eventType);
|
||||
for (int j = 0; j < vals.length; j++)
|
||||
covariateKeys[j][eventType.index][i] = vals[j];
|
||||
}
|
||||
}
|
||||
|
||||
List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||
List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
||||
|
||||
for (int j=0; j<nRequired; j++)
|
||||
requiredCovariates.add(covariateList.get(j));
|
||||
for (int j=nRequired; j<covariateList.size(); j++)
|
||||
optionalCovariates.add(covariateList.get(j));
|
||||
|
||||
BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
|
||||
for (int l = 0; l < read.getReadLength(); l++) {
|
||||
for (EventType eventType : EventType.values()) {
|
||||
long[] keySet = new long[covariateList.size()];
|
||||
Object[] expectedRequired = new Object[covariateList.size()];
|
||||
Object[] expectedCovariate = new Object[covariateList.size()];
|
||||
|
||||
for (int j = 0; j < covariateList.size(); j++) {
|
||||
keySet[j] = covariateKeys[j][eventType.index][l];
|
||||
|
||||
if (j < nRequired)
|
||||
expectedRequired[j] = covariateList.get(j).formatKey(keySet[j]);
|
||||
else
|
||||
expectedCovariate[j - nRequired] = covariateList.get(j).formatKey(keySet[j]);
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() == 0) {
|
||||
final long masterKey = keyManager.createMasterKey(keySet, eventType, -1);
|
||||
testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, -1);
|
||||
} else {
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final long masterKey = keyManager.createMasterKey(keySet, eventType, j);
|
||||
testKeys(keyManager, masterKey, nRequired, optionalCovariates, expectedRequired, expectedCovariate, eventType, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void testKeys(final BQSRKeyManager keyManager, final long key, final int nRequired, final List<Covariate> optionalCovariates,
|
||||
final Object[] expectedRequired, final Object[] expectedCovariate, final EventType eventType, final int index) {
|
||||
|
||||
Object[] actual = keyManager.keySetFrom(key).toArray();
|
||||
|
||||
// Build the expected array
|
||||
Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)];
|
||||
System.arraycopy(expectedRequired, 0, expected, 0, nRequired);
|
||||
if (optionalCovariates.size() > 0) {
|
||||
expected[expected.length-3] = expectedCovariate[index];
|
||||
expected[expected.length-2] = optionalCovariates.get(index).getClass().getSimpleName().split("Covariate")[0];
|
||||
}
|
||||
expected[expected.length-1] = eventType;
|
||||
|
||||
// System.out.println("Actual : " + Utils.join(",", Arrays.asList(actual)));
|
||||
// System.out.println("Expected: " + Utils.join(",", Arrays.asList(expected)));
|
||||
// System.out.println();
|
||||
|
||||
for (int k = 0; k < expected.length; k++)
|
||||
Assert.assertEquals(actual[k], expected[k]);
|
||||
}
|
||||
}
|
||||
|
|
@ -36,7 +36,7 @@ public class ContextCovariateUnitTest {
|
|||
verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate);
|
||||
}
|
||||
|
||||
public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
public static void verifyCovariateArray(int[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||
for (int i = 0; i < values.length; i++)
|
||||
Assert.assertEquals(contextCovariate.formatKey(values[i][0]), expectedContext(read, i, contextSize));
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class CycleCovariateUnitTest {
|
|||
verifyCovariateArray(readCovariates.getMismatchesKeySet(), -1, -1);
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(long[][] values, int init, int increment) {
|
||||
private void verifyCovariateArray(int[][] values, int init, int increment) {
|
||||
for (short i = 0; i < values.length; i++) {
|
||||
short actual = Short.decode(covariate.formatKey(values[i][0]));
|
||||
int expected = init + (increment * i);
|
||||
|
|
|
|||
|
|
@ -46,8 +46,8 @@ public class ReadGroupCovariateUnitTest {
|
|||
|
||||
}
|
||||
|
||||
private void verifyCovariateArray(long[][] values, String expected) {
|
||||
for (long[] value : values) {
|
||||
private void verifyCovariateArray(int[][] values, String expected) {
|
||||
for (int[] value : values) {
|
||||
String actual = covariate.formatKey(value[0]);
|
||||
Assert.assertEquals(actual, expected);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -32,7 +34,6 @@ public class RecalibrationReportUnitTest {
|
|||
|
||||
final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
final LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
|
||||
quantizationInfo.noQuantization();
|
||||
final List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||
|
|
@ -41,14 +42,10 @@ public class RecalibrationReportUnitTest {
|
|||
final ReadGroupCovariate rgCovariate = new ReadGroupCovariate();
|
||||
rgCovariate.initialize(RAC);
|
||||
requiredCovariates.add(rgCovariate);
|
||||
final BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(rgKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final QualityScoreCovariate qsCovariate = new QualityScoreCovariate();
|
||||
qsCovariate.initialize(RAC);
|
||||
requiredCovariates.add(qsCovariate);
|
||||
final BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(qsKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final ContextCovariate cxCovariate = new ContextCovariate();
|
||||
cxCovariate.initialize(RAC);
|
||||
|
|
@ -56,8 +53,6 @@ public class RecalibrationReportUnitTest {
|
|||
final CycleCovariate cyCovariate = new CycleCovariate();
|
||||
cyCovariate.initialize(RAC);
|
||||
optionalCovariates.add(cyCovariate);
|
||||
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(cvKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
int covariateIndex = 0;
|
||||
|
|
@ -75,34 +70,35 @@ public class RecalibrationReportUnitTest {
|
|||
readQuals[i] = 20;
|
||||
read.setBaseQualities(readQuals);
|
||||
|
||||
|
||||
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
|
||||
int nKeys = 0; // keep track of how many keys were produced
|
||||
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
for (int offset = 0; offset < length; offset++) {
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = entry.getKey();
|
||||
Map<Long, RecalDatum> table = entry.getValue();
|
||||
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) {
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, -1), RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, -1), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, -1), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys += 3;
|
||||
} else {
|
||||
for (int j = 0; j < numOptionalCovariates; j++) {
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION, j), RecalDatum.createRandomRecalDatum(10000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_INSERTION, j), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
table.put(keyManager.createMasterKey(rc.getMismatchesKeySet(offset), EventType.BASE_DELETION, j), RecalDatum.createRandomRecalDatum(100000, 10));
|
||||
nKeys += 3;
|
||||
}
|
||||
final NestedHashMap rgTable = new NestedHashMap();
|
||||
final NestedHashMap qualTable = new NestedHashMap();
|
||||
final NestedHashMap covTable = new NestedHashMap();
|
||||
|
||||
for (int offset = 0; offset < length; offset++) {
|
||||
|
||||
for (EventType errorMode : EventType.values()) {
|
||||
|
||||
final int[] covariates = rc.getKeySet(offset, errorMode);
|
||||
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
|
||||
|
||||
rgTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
|
||||
qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
||||
nKeys += 2;
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[2 + j], errorMode.index);
|
||||
nKeys++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(nKeys, expectedKeys);
|
||||
|
||||
RecalibrationReport report = new RecalibrationReport(quantizationInfo, keysAndTablesMap, RAC.generateReportTable(), RAC);
|
||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
|
||||
final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC);
|
||||
|
||||
File output = new File("RecalibrationReportUnitTestOutuput.grp");
|
||||
PrintStream out;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.BQSRKeyManager;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.ContextCovariate;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -59,19 +57,4 @@ public class BitSetUtilsUnitTest {
|
|||
//for (String d : dna)
|
||||
// Assert.assertEquals(BitSetUtils.dnaFrom(BitSetUtils.bitSetFrom(d)), d);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNumberOfBitsToRepresent() {
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(0), 0); // Make sure 0 elements need 0 bits to be represented
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(1), 1); // Make sure 1 element needs 1 bit to be represented
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(3), 2); // Make sure 3 elements need 2 bit to be represented
|
||||
|
||||
for (int i = 1; i < 63; i++) { // Can't test i == 63 because n1 is a negative number
|
||||
long n1 = 1L << i;
|
||||
long n2 = Math.abs(random.nextLong()) % n1;
|
||||
long n3 = n1 | n2;
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(n3), (n3 == n1) ? i : i + 1);
|
||||
Assert.assertEquals(BQSRKeyManager.numberOfBitsToRepresent(n1), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -22,7 +21,7 @@ import java.util.*;
|
|||
public class BaseRecalibrationUnitTest {
|
||||
|
||||
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
||||
private LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>> keysAndTablesMap;
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private ReadGroupCovariate rgCovariate;
|
||||
private QualityScoreCovariate qsCovariate;
|
||||
|
|
@ -50,19 +49,14 @@ public class BaseRecalibrationUnitTest {
|
|||
List<Covariate> optionalCovariates = new ArrayList<Covariate>();
|
||||
|
||||
dataManager = new org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager(true, 4);
|
||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<Long, RecalDatum>>();
|
||||
|
||||
rgCovariate = new ReadGroupCovariate();
|
||||
rgCovariate.initialize(RAC);
|
||||
requiredCovariates.add(rgCovariate);
|
||||
BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(rgKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
qsCovariate = new QualityScoreCovariate();
|
||||
qsCovariate.initialize(RAC);
|
||||
requiredCovariates.add(qsCovariate);
|
||||
BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(qsKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
cxCovariate = new ContextCovariate();
|
||||
cxCovariate.initialize(RAC);
|
||||
|
|
@ -70,8 +64,6 @@ public class BaseRecalibrationUnitTest {
|
|||
cyCovariate = new CycleCovariate();
|
||||
cyCovariate.initialize(RAC);
|
||||
optionalCovariates.add(cyCovariate);
|
||||
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||
keysAndTablesMap.put(cvKeyManager, new HashMap<Long, RecalDatum>());
|
||||
|
||||
final Covariate[] requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
int covariateIndex = 0;
|
||||
|
|
@ -82,10 +74,13 @@ public class BaseRecalibrationUnitTest {
|
|||
|
||||
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
long[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
final NestedHashMap rgTable = new NestedHashMap();
|
||||
final NestedHashMap qualTable = new NestedHashMap();
|
||||
final NestedHashMap covTable = new NestedHashMap();
|
||||
|
||||
Object[] objKey = buildObjectKey(bitKeys);
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
final int[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
final Object[] objKey = buildObjectKey(bitKeys);
|
||||
|
||||
Random random = new Random();
|
||||
int nObservations = random.nextInt(10000);
|
||||
|
|
@ -97,21 +92,17 @@ public class BaseRecalibrationUnitTest {
|
|||
dataManager.addToAllTables(objKey, oldDatum, QualityUtils.MIN_USABLE_Q_SCORE);
|
||||
|
||||
RecalDatum newDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
final BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
final int numOptionalCovariates = keyManager.getNumOptionalCovariates();
|
||||
if (numOptionalCovariates == 0) {
|
||||
final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, -1);
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum);
|
||||
} else {
|
||||
for (int j = 0; j < numOptionalCovariates; j++) {
|
||||
final long masterKey = keyManager.createMasterKey(bitKeys, EventType.BASE_SUBSTITUTION, j);
|
||||
updateCovariateWithKeySet(mapEntry.getValue(), masterKey, newDatum);
|
||||
}
|
||||
}
|
||||
|
||||
rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index);
|
||||
qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index);
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[2 + j], EventType.BASE_SUBSTITUTION.index);
|
||||
}
|
||||
}
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
||||
List<Long> qualCounts = new ArrayList<Long>();
|
||||
|
|
@ -121,16 +112,15 @@ public class BaseRecalibrationUnitTest {
|
|||
}
|
||||
QuantizationInfo quantizationInfo = new QuantizationInfo(quantizedQuals, qualCounts);
|
||||
quantizationInfo.noQuantization();
|
||||
baseRecalibration = new BaseRecalibration(quantizationInfo, keysAndTablesMap, requestedCovariates);
|
||||
baseRecalibration = new BaseRecalibration(quantizationInfo, recalibrationTables, requestedCovariates);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled=false)
|
||||
public void testGoldStandardComparison() {
|
||||
debugTables();
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
long [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
int [] bitKey = readCovariates.getKeySet(i, EventType.BASE_SUBSTITUTION);
|
||||
Object [] objKey = buildObjectKey(bitKey);
|
||||
byte v2 = baseRecalibration.performSequentialQualityCalculation(bitKey, EventType.BASE_SUBSTITUTION);
|
||||
byte v1 = goldStandardSequentialCalculation(objKey);
|
||||
|
|
@ -138,7 +128,7 @@ public class BaseRecalibrationUnitTest {
|
|||
}
|
||||
}
|
||||
|
||||
private Object[] buildObjectKey(long[] bitKey) {
|
||||
private Object[] buildObjectKey(final int[] bitKey) {
|
||||
Object[] key = new Object[bitKey.length];
|
||||
key[0] = rgCovariate.formatKey(bitKey[0]);
|
||||
key[1] = qsCovariate.formatKey(bitKey[1]);
|
||||
|
|
@ -147,49 +137,6 @@ public class BaseRecalibrationUnitTest {
|
|||
return key;
|
||||
}
|
||||
|
||||
private void debugTables() {
|
||||
System.out.println("\nV1 Table\n");
|
||||
System.out.println("ReadGroup Table:");
|
||||
NestedHashMap nestedTable = dataManager.getCollapsedTable(0);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
System.out.println("\nQualityScore Table:");
|
||||
nestedTable = dataManager.getCollapsedTable(1);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
System.out.println("\nCovariates Table:");
|
||||
nestedTable = dataManager.getCollapsedTable(2);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
nestedTable = dataManager.getCollapsedTable(3);
|
||||
printNestedHashMap(nestedTable.data, "");
|
||||
|
||||
|
||||
int i = 0;
|
||||
System.out.println("\nV2 Table\n");
|
||||
for (Map.Entry<BQSRKeyManager, Map<Long, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||
BQSRKeyManager keyManager = mapEntry.getKey();
|
||||
Map<Long, RecalDatum> table = mapEntry.getValue();
|
||||
switch(i++) {
|
||||
case 0 :
|
||||
System.out.println("ReadGroup Table:");
|
||||
break;
|
||||
case 1 :
|
||||
System.out.println("QualityScore Table:");
|
||||
break;
|
||||
case 2 :
|
||||
System.out.println("Covariates Table:");
|
||||
break;
|
||||
}
|
||||
for (Map.Entry<Long, RecalDatum> entry : table.entrySet()) {
|
||||
Long key = entry.getKey();
|
||||
RecalDatum datum = entry.getValue();
|
||||
List<Object> keySet = keyManager.keySetFrom(key);
|
||||
System.out.println(String.format("%s => %s", Utils.join(",", keySet), datum) + "," + datum.getEstimatedQReported());
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static void printNestedHashMap(Map table, String output) {
|
||||
for (Object key : table.keySet()) {
|
||||
String ret;
|
||||
|
|
|
|||
Loading…
Reference in New Issue