Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
b4a7c0d98d
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -141,6 +142,11 @@ public class GATKReport {
|
||||||
tables.put(table.getTableName(), table);
|
tables.put(table.getTableName(), table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addTables(List<GATKReportTable> gatkReportTables) {
|
||||||
|
for (GATKReportTable table : gatkReportTables)
|
||||||
|
addTable(table);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if table with a given name exists
|
* Return true if table with a given name exists
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -26,17 +26,13 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Gatherer;
|
import org.broadinstitute.sting.commandline.Gatherer;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatumOptimized;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: carneiro
|
* User: carneiro
|
||||||
|
|
@ -45,80 +41,31 @@ import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class BQSRGatherer extends Gatherer {
|
public class BQSRGatherer extends Gatherer {
|
||||||
|
|
||||||
/////////////////////////////
|
private static final String EMPTY_INPUT_LIST = "list of inputs files is empty";
|
||||||
// Private Member Variables
|
private static final String MISSING_OUTPUT_FILE = "missing output file name";
|
||||||
/////////////////////////////
|
|
||||||
private static final String EOF_MARKER = "EOF";
|
|
||||||
|
|
||||||
private HashMap<String, RecalDatumOptimized> dataMap = new HashMap<String, RecalDatumOptimized>();
|
|
||||||
|
|
||||||
|
|
||||||
private void addCSVData (String line) {
|
|
||||||
String[] covariates = line.split(",");
|
|
||||||
String key = "";
|
|
||||||
RecalDatumOptimized values;
|
|
||||||
|
|
||||||
for (int i = 0; i < covariates.length-3; i++)
|
|
||||||
key += covariates[i] + ",";
|
|
||||||
|
|
||||||
if (covariates.length < 3)
|
|
||||||
throw new ReviewedStingException("Line only has 1 covariate : " + line);
|
|
||||||
|
|
||||||
values = new RecalDatumOptimized(Long.parseLong(covariates[covariates.length - 3]), Long.parseLong(covariates[covariates.length - 2]));
|
|
||||||
|
|
||||||
RecalDatumOptimized currentValues = dataMap.get(key);
|
|
||||||
if (currentValues == null)
|
|
||||||
dataMap.put(key, values);
|
|
||||||
else
|
|
||||||
currentValues.increment(values);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void gather(List<File> inputs, File output) {
|
public void gather(List<File> inputs, File output) {
|
||||||
PrintStream o;
|
RecalibrationReport generalReport = null;
|
||||||
|
PrintStream outputFile;
|
||||||
try {
|
try {
|
||||||
o = new PrintStream(output);
|
outputFile = new PrintStream(output);
|
||||||
} catch ( FileNotFoundException e) {
|
} catch(FileNotFoundException e) {
|
||||||
throw new UserException("File to be output by CountCovariates Gather function was not found");
|
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean sawEOF = false;
|
for (File input : inputs) {
|
||||||
boolean printedHeader = false;
|
RecalibrationReport inputReport = new RecalibrationReport(input);
|
||||||
|
if (generalReport == null)
|
||||||
// Read input files
|
generalReport = inputReport;
|
||||||
for ( File RECAL_FILE : inputs) {
|
else
|
||||||
try {
|
generalReport.combine(inputReport);
|
||||||
for ( String line : new XReadLines(RECAL_FILE) ) {
|
|
||||||
if ( EOF_MARKER.equals(line) ) {
|
|
||||||
sawEOF = true; // sanity check
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if(line.startsWith("#")) {
|
|
||||||
if (!printedHeader)
|
|
||||||
o.println(line);
|
|
||||||
}
|
|
||||||
|
|
||||||
else // Found a line of data
|
|
||||||
addCSVData(line); // Parse the line and add the data to the HashMap
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( !sawEOF ) {
|
|
||||||
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted!";
|
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
|
||||||
}
|
|
||||||
printedHeader = true;
|
|
||||||
}
|
}
|
||||||
|
if (generalReport == null)
|
||||||
|
throw new ReviewedStingException(EMPTY_INPUT_LIST);
|
||||||
|
|
||||||
// Write output file from dataMap
|
generalReport.calculateEmpiricalAndQuantizedQualities();
|
||||||
for(Map.Entry<String, RecalDatumOptimized> entry : dataMap.entrySet())
|
generalReport.output(outputFile);
|
||||||
o.println(entry.getKey() + entry.getValue().outputToCSV());
|
|
||||||
o.println("EOF");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,17 +25,17 @@ import java.util.*;
|
||||||
* @since 3/6/12
|
* @since 3/6/12
|
||||||
*/
|
*/
|
||||||
public class BQSRKeyManager {
|
public class BQSRKeyManager {
|
||||||
private List<RequiredCovariateInfo> requiredCovariates;
|
private final List<RequiredCovariateInfo> requiredCovariates;
|
||||||
private List<OptionalCovariateInfo> optionalCovariates;
|
private final List<OptionalCovariateInfo> optionalCovariates;
|
||||||
private Map<String, Short> covariateNameToIDMap;
|
private final Map<String, Short> covariateNameToIDMap;
|
||||||
|
|
||||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||||
private int nOptionalBits; // Number of bits used to represent the standard covaraites
|
private int nOptionalBits; // Number of bits used to represent the standard covaraites
|
||||||
private int nOptionalIDBits; // Number of bits used to represent the optional covariates IDs
|
private final int nOptionalIDBits; // Number of bits used to represent the optional covariates IDs
|
||||||
private int totalNumberOfBits; // Sum of all of the above plus the event bits
|
private final int totalNumberOfBits; // Sum of all of the above plus the event bits
|
||||||
|
|
||||||
private BitSet optionalCovariateMask; // Standard mask for optional covariates bitset
|
private final BitSet optionalCovariateMask; // Standard mask for optional covariates bitset
|
||||||
private BitSet optionalCovariateIDMask; // Standard mask for optional covariates order bitset
|
private final BitSet optionalCovariateIDMask; // Standard mask for optional covariates order bitset
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the KeyManager with the total number of covariates to use
|
* Initializes the KeyManager with the total number of covariates to use
|
||||||
|
|
@ -44,34 +44,34 @@ public class BQSRKeyManager {
|
||||||
* @param optionalCovariates the ordered list of optional covariates
|
* @param optionalCovariates the ordered list of optional covariates
|
||||||
*/
|
*/
|
||||||
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
||||||
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||||
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||||
this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||||
|
|
||||||
nRequiredBits = 0;
|
nRequiredBits = 0;
|
||||||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
|
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
|
||||||
nRequiredBits += nBits;
|
nRequiredBits += nBits;
|
||||||
}
|
}
|
||||||
|
|
||||||
short id = 0;
|
short id = 0;
|
||||||
nOptionalBits = 0;
|
nOptionalBits = 0;
|
||||||
for (Covariate optional : optionalCovariates) {
|
for (Covariate optional : optionalCovariates) {
|
||||||
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
||||||
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||||
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
|
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
|
||||||
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||||
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||||
this.covariateNameToIDMap.put(covariateName, id);
|
this.covariateNameToIDMap.put(covariateName, id);
|
||||||
id++;
|
id++;
|
||||||
}
|
}
|
||||||
|
|
||||||
nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
|
||||||
optionalCovariateMask = genericMask(nRequiredBits, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
optionalCovariateMask = genericMask(nRequiredBits, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
|
||||||
optionalCovariateIDMask = genericMask(nRequiredBits + nOptionalBits, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
optionalCovariateIDMask = genericMask(nRequiredBits + nOptionalBits, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
|
||||||
totalNumberOfBits = nRequiredBits + nOptionalBits + nOptionalIDBits + bitsInEventType(); // total number of bits used in the final key
|
totalNumberOfBits = nRequiredBits + nOptionalBits + nOptionalIDBits + bitsInEventType(); // total number of bits used in the final key
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -93,32 +93,32 @@ public class BQSRKeyManager {
|
||||||
* @return one key in bitset representation per covariate
|
* @return one key in bitset representation per covariate
|
||||||
*/
|
*/
|
||||||
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
|
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
|
||||||
List<BitSet> allBitSets = new LinkedList<BitSet>(); // Generate one key per optional covariate
|
List<BitSet> allBitSets = new LinkedList<BitSet>(); // Generate one key per optional covariate
|
||||||
|
|
||||||
BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type
|
BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type
|
||||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
|
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
|
||||||
|
|
||||||
int covariateIndex = 0;
|
int covariateIndex = 0;
|
||||||
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
||||||
for (RequiredCovariateInfo infoRequired : requiredCovariates)
|
for (RequiredCovariateInfo infoRequired : requiredCovariates)
|
||||||
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
||||||
|
|
||||||
for (OptionalCovariateInfo infoOptional : optionalCovariates) {
|
for (OptionalCovariateInfo infoOptional : optionalCovariates) {
|
||||||
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
||||||
if (covariateKey == null)
|
if (covariateKey == null)
|
||||||
continue; // do not add nulls to the final set of keys.
|
continue; // do not add nulls to the final set of keys.
|
||||||
|
|
||||||
BitSet optionalKey = new BitSet(totalNumberOfBits); // create a new key for this optional covariate
|
BitSet optionalKey = new BitSet(totalNumberOfBits); // create a new key for this optional covariate
|
||||||
optionalKey.or(requiredKey); // import all the required covariates
|
optionalKey.or(requiredKey); // import all the required covariates
|
||||||
addBitSetToKeyAtLocation(optionalKey, covariateKey, nRequiredBits); // add the optional covariate right after the required covariates
|
addBitSetToKeyAtLocation(optionalKey, covariateKey, nRequiredBits); // add the optional covariate right after the required covariates
|
||||||
addBitSetToKeyAtLocation(optionalKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
addBitSetToKeyAtLocation(optionalKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
||||||
addBitSetToKeyAtLocation(optionalKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
addBitSetToKeyAtLocation(optionalKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||||
allBitSets.add(optionalKey); // add this key to the list of keys
|
allBitSets.add(optionalKey); // add this key to the list of keys
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optionalCovariates.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
if (optionalCovariates.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||||
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||||
allBitSets.add(requiredKey); // add this key to the list of keys
|
allBitSets.add(requiredKey); // add this key to the list of keys
|
||||||
}
|
}
|
||||||
|
|
||||||
return allBitSets;
|
return allBitSets;
|
||||||
|
|
@ -141,25 +141,25 @@ public class BQSRKeyManager {
|
||||||
|
|
||||||
int requiredCovariate = 0;
|
int requiredCovariate = 0;
|
||||||
for (RequiredCovariateInfo infoRequired : requiredCovariates) {
|
for (RequiredCovariateInfo infoRequired : requiredCovariates) {
|
||||||
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
||||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optionalCovariates.size() > 0) {
|
if (optionalCovariates.size() > 0) {
|
||||||
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
|
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
|
||||||
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
||||||
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||||
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
|
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
|
||||||
|
|
||||||
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
||||||
addBitSetToKeyAtLocation(bitSetKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
addBitSetToKeyAtLocation(bitSetKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
|
||||||
}
|
}
|
||||||
|
|
||||||
int eventIndex = key.length - 1; // the event type is always the last key
|
int eventIndex = key.length - 1; // the event type is always the last key
|
||||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // location in the bit set to add the event type bits
|
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // location in the bit set to add the event type bits
|
||||||
BitSet eventBitSet = bitSetFromEvent((EventType) key[eventIndex]); // get the bit set representation of the event type
|
BitSet eventBitSet = bitSetFromEvent((EventType) key[eventIndex]); // get the bit set representation of the event type
|
||||||
addBitSetToKeyAtLocation(bitSetKey, eventBitSet, eventTypeBitIndex); // add the event type
|
addBitSetToKeyAtLocation(bitSetKey, eventBitSet, eventTypeBitIndex); // add the event type
|
||||||
|
|
||||||
return bitSetKey;
|
return bitSetKey;
|
||||||
}
|
}
|
||||||
|
|
@ -186,19 +186,19 @@ public class BQSRKeyManager {
|
||||||
public List<Object> keySetFrom(BitSet key) {
|
public List<Object> keySetFrom(BitSet key) {
|
||||||
List<Object> objectKeys = new ArrayList<Object>();
|
List<Object> objectKeys = new ArrayList<Object>();
|
||||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
for (RequiredCovariateInfo info : requiredCovariates) {
|
||||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optionalCovariates.size() > 0) {
|
if (optionalCovariates.size() > 0) {
|
||||||
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
||||||
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits);// mask out the covariate order (to identify which covariate this is)
|
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
||||||
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
||||||
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
|
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
|
||||||
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
||||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||||
}
|
}
|
||||||
objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set
|
objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set
|
||||||
|
|
||||||
return objectKeys;
|
return objectKeys;
|
||||||
}
|
}
|
||||||
|
|
@ -227,7 +227,7 @@ public class BQSRKeyManager {
|
||||||
private BitSet chopNBitsFrom(BitSet key, int n) {
|
private BitSet chopNBitsFrom(BitSet key, int n) {
|
||||||
BitSet choppedKey = new BitSet();
|
BitSet choppedKey = new BitSet();
|
||||||
for (int i = key.nextSetBit(0); i >= 0; i = key.nextSetBit(i + 1))
|
for (int i = key.nextSetBit(0); i >= 0; i = key.nextSetBit(i + 1))
|
||||||
choppedKey.set(i - n); // Set every bit translocated to the beginning of the BitSet
|
choppedKey.set(i - n); // Set every bit translocated to the beginning of the BitSet
|
||||||
return choppedKey;
|
return choppedKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -269,7 +269,7 @@ public class BQSRKeyManager {
|
||||||
|
|
||||||
private void addBitSetToKeyAtLocation(BitSet key, BitSet bitSet, int location) {
|
private void addBitSetToKeyAtLocation(BitSet key, BitSet bitSet, int location) {
|
||||||
for (int j = bitSet.nextSetBit(0); j >= 0; j = bitSet.nextSetBit(j + 1))
|
for (int j = bitSet.nextSetBit(0); j >= 0; j = bitSet.nextSetBit(j + 1))
|
||||||
key.set(j + location); // translate the bits set in the key to their corresponding position in the full key
|
key.set(j + location); // translate the bits set in the key to their corresponding position in the full key
|
||||||
}
|
}
|
||||||
|
|
||||||
private BitSet extractBitSetFromKey (BitSet key, BitSet mask, int leadingBits) {
|
private BitSet extractBitSetFromKey (BitSet key, BitSet mask, int leadingBits) {
|
||||||
|
|
@ -282,22 +282,20 @@ public class BQSRKeyManager {
|
||||||
* Aggregate information for each Covariate
|
* Aggregate information for each Covariate
|
||||||
*/
|
*/
|
||||||
class RequiredCovariateInfo {
|
class RequiredCovariateInfo {
|
||||||
public int bitsBefore; // number of bits before this covariate in the combined bitset key
|
public final int bitsBefore; // number of bits before this covariate in the combined bitset key
|
||||||
public int nBits; // number of bits used by this covariate (cached access to covariate.nBits())
|
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||||
public BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||||
public Covariate covariate; // this allows reverse lookup of the Covariates in order
|
|
||||||
|
|
||||||
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
|
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
|
||||||
this.bitsBefore = bitsBefore;
|
this.bitsBefore = bitsBefore;
|
||||||
this.nBits = nBits;
|
|
||||||
this.mask = mask;
|
this.mask = mask;
|
||||||
this.covariate = covariate;
|
this.covariate = covariate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class OptionalCovariateInfo {
|
class OptionalCovariateInfo {
|
||||||
public BitSet covariateID; // cache the covariate ID
|
public final BitSet covariateID; // cache the covariate ID
|
||||||
public Covariate covariate;
|
public final Covariate covariate;
|
||||||
|
|
||||||
OptionalCovariateInfo(BitSet covariateID, Covariate covariate) {
|
OptionalCovariateInfo(BitSet covariateID, Covariate covariate) {
|
||||||
this.covariateID = covariateID;
|
this.covariateID = covariateID;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class that encapsulates the information necessary for quality score quantization for BQSR
|
||||||
|
*
|
||||||
|
* @author carneiro
|
||||||
|
* @since 3/26/12
|
||||||
|
*/
|
||||||
|
public class QuantizationInfo {
|
||||||
|
private List<Byte> quantizedQuals;
|
||||||
|
private List<Long> empiricalQualCounts;
|
||||||
|
|
||||||
|
public QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts) {
|
||||||
|
this.quantizedQuals = quantizedQuals;
|
||||||
|
this.empiricalQualCounts = empiricalQualCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public QuantizationInfo(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, int nLevels) {
|
||||||
|
final Long [] qualHistogram = new Long[QualityUtils.MAX_QUAL_SCORE+1]; // create a histogram with the empirical quality distribution
|
||||||
|
for (int i = 0; i < qualHistogram.length; i++)
|
||||||
|
qualHistogram[i] = 0L;
|
||||||
|
|
||||||
|
Map<BitSet, RecalDatum> qualTable = null; // look for the quality score table
|
||||||
|
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||||
|
BQSRKeyManager keyManager = entry.getKey();
|
||||||
|
if (keyManager.getRequiredCovariates().size() == 2) // it should be the only one with 2 required covaraites
|
||||||
|
qualTable = entry.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (qualTable == null)
|
||||||
|
throw new ReviewedStingException("Could not find QualityScore table.");
|
||||||
|
|
||||||
|
for (RecalDatum datum : qualTable.values()) {
|
||||||
|
int empiricalQual = (int) Math.round(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||||
|
long nObservations = datum.numObservations;
|
||||||
|
qualHistogram[empiricalQual] += nObservations; // add the number of observations for every key
|
||||||
|
}
|
||||||
|
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||||
|
quantizeQualityScores(nLevels);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void quantizeQualityScores(int nLevels) {
|
||||||
|
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
|
||||||
|
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Byte> getQuantizedQuals() {
|
||||||
|
return quantizedQuals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKReportTable generateReportTable() {
|
||||||
|
GATKReportTable quantizedTable = new GATKReportTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map");
|
||||||
|
quantizedTable.addPrimaryKey(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||||
|
quantizedTable.addColumn(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, 0L);
|
||||||
|
quantizedTable.addColumn(RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, (byte) 0);
|
||||||
|
|
||||||
|
for (int qual = 0; qual <= QualityUtils.MAX_QUAL_SCORE; qual++) {
|
||||||
|
quantizedTable.set(qual, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
|
||||||
|
quantizedTable.set(qual, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
|
||||||
|
}
|
||||||
|
return quantizedTable;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,7 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -71,13 +73,14 @@ public class RecalDataManager {
|
||||||
public final static String QUALITY_SCORE_COLUMN_NAME = "QualityScore";
|
public final static String QUALITY_SCORE_COLUMN_NAME = "QualityScore";
|
||||||
public final static String COVARIATE_VALUE_COLUMN_NAME = "CovariateValue";
|
public final static String COVARIATE_VALUE_COLUMN_NAME = "CovariateValue";
|
||||||
public final static String COVARIATE_NAME_COLUMN_NAME = "CovariateName";
|
public final static String COVARIATE_NAME_COLUMN_NAME = "CovariateName";
|
||||||
|
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
|
||||||
|
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
|
||||||
|
|
||||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
private final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
||||||
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
/**
|
/**
|
||||||
* Treat reference inserted bases as reference matching bases. Very unsafe!
|
* Treat reference inserted bases as reference matching bases. Very unsafe!
|
||||||
|
|
@ -136,25 +139,38 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void listAvailableCovariates(Logger logger) {
|
|
||||||
// Get a list of all available covariates
|
|
||||||
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
|
||||||
|
|
||||||
// Print and exit if that's what was requested
|
/**
|
||||||
logger.info("Available covariates:");
|
* Initializes the recalibration table -> key manager map
|
||||||
for (Class<?> covClass : covariateClasses)
|
*
|
||||||
logger.info(covClass.getSimpleName());
|
* @param requiredCovariates list of required covariates (in order)
|
||||||
logger.info("");
|
* @param optionalCovariates list of optional covariates (in order)
|
||||||
|
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
||||||
|
*/
|
||||||
|
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
||||||
|
final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||||
|
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||||
|
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||||
|
for (Covariate covariate : requiredCovariates) {
|
||||||
|
requiredCovariatesToAdd.add(covariate);
|
||||||
|
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
|
||||||
|
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||||
|
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||||
|
}
|
||||||
|
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
||||||
|
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||||
|
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
||||||
|
return tablesAndKeysMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates two lists : required covariates and optional covariates based on the user's requests.
|
* Generates two lists : required covariates and optional covariates based on the user's requests.
|
||||||
*
|
*
|
||||||
* Performs the following tasks in order:
|
* Performs the following tasks in order:
|
||||||
* 1. Adds all requierd covariates in order
|
* 1. Adds all requierd covariates in order
|
||||||
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
|
* 2. Check if the user asked to use the standard covariates and adds them all if that's the case
|
||||||
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
|
* 3. Adds all covariates requested by the user that were not already added by the two previous steps
|
||||||
*
|
*
|
||||||
* @param argumentCollection the argument collection object for the recalibration walker
|
* @param argumentCollection the argument collection object for the recalibration walker
|
||||||
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
|
* @return a pair of ordered lists : required covariates (first) and optional covariates (second)
|
||||||
*/
|
*/
|
||||||
|
|
@ -194,52 +210,102 @@ public class RecalDataManager {
|
||||||
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
|
return new Pair<ArrayList<Covariate>, ArrayList<Covariate>>(requiredCovariates, optionalCovariates);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public static void listAvailableCovariates(Logger logger) {
|
||||||
* Initializes the recalibration table -> key manager map
|
// Get a list of all available covariates
|
||||||
*
|
final List<Class<? extends Covariate>> covariateClasses = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
||||||
* @param requiredCovariates list of required covariates (in order)
|
|
||||||
* @param optionalCovariates list of optional covariates (in order)
|
// Print and exit if that's what was requested
|
||||||
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
logger.info("Available covariates:");
|
||||||
*/
|
for (Class<?> covClass : covariateClasses)
|
||||||
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> initializeTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
logger.info(covClass.getSimpleName());
|
||||||
final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
logger.info("");
|
||||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
|
||||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
|
||||||
for (Covariate covariate : requiredCovariates) {
|
|
||||||
requiredCovariatesToAdd.add(covariate);
|
|
||||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
|
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
|
||||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
|
||||||
}
|
|
||||||
final Map<BitSet, RecalDatum> recalTable = new HashMap<BitSet, RecalDatum>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
|
||||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
|
||||||
return tablesAndKeysMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||||
* Initializes the table -> key manager map (unfortunate copy of the above code with minor modifications to accomodate the different return types (RecalDatum vs EmpiricalQual objects)
|
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||||
*
|
int tableIndex = 0;
|
||||||
* @param requiredCovariates list of required covariates (in order)
|
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||||
* @param optionalCovariates list of optional covariates (in order)
|
BQSRKeyManager keyManager = entry.getKey();
|
||||||
* @return a map with each key manager and it's corresponding recalibration table properly initialized
|
Map<BitSet, RecalDatum> recalTable = entry.getValue();
|
||||||
*/
|
|
||||||
public static LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>> initializeEmpiricalTables(ArrayList<Covariate> requiredCovariates, ArrayList<Covariate> optionalCovariates) {
|
GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "");
|
||||||
final LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>> tablesAndKeysMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>>();
|
final Pair<String, String> covariateValue = new Pair<String, String>(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME, "%s");
|
||||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size() + 1); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
final Pair<String, String> covariateName = new Pair<String, String>(RecalDataManager.COVARIATE_NAME_COLUMN_NAME, "%s");
|
||||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
final Pair<String, String> eventType = new Pair<String, String>(RecalDataManager.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||||
for (Covariate covariate : requiredCovariates) {
|
final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME, "%.2f");
|
||||||
requiredCovariatesToAdd.add(covariate);
|
final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.2f");
|
||||||
final Map<BitSet, EmpiricalQual> recalTable = new HashMap<BitSet, EmpiricalQual>(QualityUtils.MAX_QUAL_SCORE); // initializing a new recal table for each required covariate (cumulatively)
|
final Pair<String, String> nObservations = new Pair<String, String>(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
final Pair<String, String> nErrors = new Pair<String, String>(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME, "%d");
|
||||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
|
||||||
|
long primaryKey = 0L;
|
||||||
|
|
||||||
|
List<Covariate> requiredList = keyManager.getRequiredCovariates(); // ask the key manager what required covariates were used in this recal table
|
||||||
|
List<Covariate> optionalList = keyManager.getOptionalCovariates(); // ask the key manager what optional covariates were used in this recal table
|
||||||
|
|
||||||
|
ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||||
|
|
||||||
|
for (Covariate covariate : requiredList) {
|
||||||
|
String name = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the covariate names and put them in order
|
||||||
|
columnNames.add(new Pair<String,String>(name, "%s")); // save the required covariate name so we can reference it in the future
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optionalList.size() > 0) {
|
||||||
|
columnNames.add(covariateValue);
|
||||||
|
columnNames.add(covariateName);
|
||||||
|
}
|
||||||
|
|
||||||
|
columnNames.add(eventType); // the order of these column names is important here
|
||||||
|
columnNames.add(empiricalQuality);
|
||||||
|
columnNames.add(estimatedQReported);
|
||||||
|
columnNames.add(nObservations);
|
||||||
|
columnNames.add(nErrors);
|
||||||
|
|
||||||
|
|
||||||
|
reportTable.addPrimaryKey("PrimaryKey", false); // every table must have a primary key (hidden)
|
||||||
|
for (Pair<String, String> columnName : columnNames)
|
||||||
|
reportTable.addColumn(columnName.getFirst(), true, columnName.getSecond()); // every table must have the event type
|
||||||
|
|
||||||
|
for (Map.Entry<BitSet, RecalDatum> recalTableEntry : recalTable.entrySet()) { // create a map with column name => key value for all covariate keys
|
||||||
|
BitSet bitSetKey = recalTableEntry.getKey();
|
||||||
|
Map<String, Object> columnData = new HashMap<String, Object>(columnNames.size());
|
||||||
|
Iterator<Pair<String, String>> iterator = columnNames.iterator();
|
||||||
|
for (Object key : keyManager.keySetFrom(bitSetKey)) {
|
||||||
|
String columnName = iterator.next().getFirst();
|
||||||
|
columnData.put(columnName, key);
|
||||||
|
}
|
||||||
|
RecalDatum datum = recalTableEntry.getValue();
|
||||||
|
columnData.put(iterator.next().getFirst(), datum.getEmpiricalQuality()); // iterator.next() gives the column name for Empirical Quality
|
||||||
|
columnData.put(iterator.next().getFirst(), Math.round(datum.getEstimatedQReported())); // iterator.next() gives the column name for EstimatedQReported
|
||||||
|
columnData.put(iterator.next().getFirst(), datum.numObservations);
|
||||||
|
columnData.put(iterator.next().getFirst(), datum.numMismatches);
|
||||||
|
|
||||||
|
for (Map.Entry<String, Object> dataEntry : columnData.entrySet()) {
|
||||||
|
String columnName = dataEntry.getKey();
|
||||||
|
Object value = dataEntry.getValue();
|
||||||
|
reportTable.set(primaryKey, columnName, value.toString());
|
||||||
|
}
|
||||||
|
primaryKey++;
|
||||||
|
}
|
||||||
|
result.add(reportTable);
|
||||||
}
|
}
|
||||||
final Map<BitSet, EmpiricalQual> recalTable = new HashMap<BitSet, EmpiricalQual>(Short.MAX_VALUE); // initializing a new recal table to hold all optional covariates
|
return result;
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
|
||||||
tablesAndKeysMap.put(keyManager, recalTable); // adding the pair table+key to the map
|
|
||||||
return tablesAndKeysMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void outputRecalibrationReport(RecalibrationArgumentCollection RAC, QuantizationInfo quantizationInfo, Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||||
|
outputRecalibrationReport(RAC.generateReportTable(), quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void outputRecalibrationReport(GATKReportTable argumentTable, QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager,Map<BitSet, RecalDatum>> keysAndTablesMap, PrintStream outputFile) {
|
||||||
|
outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(keysAndTablesMap), outputFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void outputRecalibrationReport(GATKReportTable argumentTable, GATKReportTable quantizationTable, List<GATKReportTable> recalTables, PrintStream outputFile) {
|
||||||
|
GATKReport report = new GATKReport();
|
||||||
|
report.addTable(argumentTable);
|
||||||
|
report.addTable(quantizationTable);
|
||||||
|
report.addTables(recalTables);
|
||||||
|
report.print(outputFile);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,8 @@ public class RecalDatum extends RecalDatumOptimized {
|
||||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||||
|
|
||||||
|
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// constructors
|
// constructors
|
||||||
|
|
@ -75,7 +77,6 @@ public class RecalDatum extends RecalDatumOptimized {
|
||||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||||
this.increment(other.numObservations, other.numMismatches);
|
this.increment(other.numObservations, other.numMismatches);
|
||||||
this.estimatedQReported = -10 * Math.log10(sumErrors / (double) this.numObservations);
|
this.estimatedQReported = -10 * Math.log10(sumErrors / (double) this.numObservations);
|
||||||
//if( this.estimatedQReported > QualityUtils.MAX_REASONABLE_Q_SCORE ) { this.estimatedQReported = QualityUtils.MAX_REASONABLE_Q_SCORE; }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -84,8 +85,8 @@ public class RecalDatum extends RecalDatumOptimized {
|
||||||
//
|
//
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public final void calcCombinedEmpiricalQuality(final int smoothing, final int maxQual) {
|
public final void calcCombinedEmpiricalQuality(final int maxQual) {
|
||||||
this.empiricalQuality = empiricalQualDouble(smoothing, maxQual); // cache the value so we don't call log over and over again
|
this.empiricalQuality = empiricalQualDouble(SMOOTHING_CONSTANT, maxQual); // cache the value so we don't call log over and over again
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void calcEstimatedReportedQuality() {
|
public final void calcEstimatedReportedQuality() {
|
||||||
|
|
@ -106,6 +107,11 @@ public class RecalDatum extends RecalDatumOptimized {
|
||||||
return empiricalQuality;
|
return empiricalQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final void resetCalculatedQualities() {
|
||||||
|
empiricalQuality = 0.0;
|
||||||
|
estimatedQReported = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
private double calcExpectedErrors() {
|
private double calcExpectedErrors() {
|
||||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
@ -156,5 +158,25 @@ public class RecalibrationArgumentCollection {
|
||||||
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
|
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
|
||||||
public int QUANTIZING_LEVELS = 16;
|
public int QUANTIZING_LEVELS = 16;
|
||||||
|
|
||||||
|
public GATKReportTable generateReportTable() {
|
||||||
|
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run");
|
||||||
|
argumentsTable.addPrimaryKey("Argument");
|
||||||
|
argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, "null");
|
||||||
|
argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES));
|
||||||
|
argumentsTable.set("standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, USE_STANDARD_COVARIATES);
|
||||||
|
argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||||
|
argumentsTable.set("solid_recal_mode", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||||
|
argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||||
|
argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||||
|
argumentsTable.set("insertions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_CONTEXT_SIZE);
|
||||||
|
argumentsTable.set("deletions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_CONTEXT_SIZE);
|
||||||
|
argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||||
|
argumentsTable.set("insertions_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||||
|
argumentsTable.set("low_quality_tail", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||||
|
argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||||
|
argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||||
|
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||||
|
return argumentsTable;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,290 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class has all the static functionality for reading a recalibration report file into memory.
|
||||||
|
*
|
||||||
|
* @author carneiro
|
||||||
|
* @since 3/26/12
|
||||||
|
*/
|
||||||
|
public class RecalibrationReport {
|
||||||
|
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||||
|
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||||
|
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||||
|
|
||||||
|
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||||
|
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
|
||||||
|
|
||||||
|
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||||
|
|
||||||
|
public RecalibrationReport(final File RECAL_FILE) {
|
||||||
|
GATKReport report = new GATKReport(RECAL_FILE);
|
||||||
|
|
||||||
|
argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
|
||||||
|
RAC = initializeArgumentCollectionTable(argumentTable);
|
||||||
|
|
||||||
|
GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
|
||||||
|
quantizationInfo = initializeQuantizationTable(quantizedTable);
|
||||||
|
|
||||||
|
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||||
|
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||||
|
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||||
|
requestedCovariates.addAll(requiredCovariates); // add all required covariates to the list of requested covariates
|
||||||
|
requestedCovariates.addAll(optionalCovariates); // add all optional covariates to the list of requested covariates
|
||||||
|
|
||||||
|
for (Covariate cov : requestedCovariates)
|
||||||
|
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||||
|
|
||||||
|
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||||
|
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
||||||
|
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
||||||
|
for (Covariate covariate : requiredCovariates) {
|
||||||
|
requiredCovariatesToAdd.add(covariate);
|
||||||
|
final Map<BitSet, RecalDatum> table; // initializing a new recal table for each required covariate (cumulatively)
|
||||||
|
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||||
|
|
||||||
|
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||||
|
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||||
|
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||||
|
table = parseReadGroupTable(keyManager, reportTable);
|
||||||
|
}
|
||||||
|
else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table
|
||||||
|
final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||||
|
table = parseQualityScoreTable(keyManager, reportTable);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION);
|
||||||
|
|
||||||
|
keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
||||||
|
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||||
|
final Map<BitSet, RecalDatum> table = parseAllCovariatesTable(keyManager, reportTable);
|
||||||
|
keysAndTablesMap.put(keyManager, table);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combines two recalibration reports by adding all observations and errors
|
||||||
|
*
|
||||||
|
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate them
|
||||||
|
* after combining. The reason for not calculating it is because this function is inteded for combining a series of
|
||||||
|
* recalibration reports, and it only makes sense to calculate the empirical qualities and quantized qualities after all
|
||||||
|
* the recalibration reports have been combined. Having the user recalculate when appropriate, makes this method faster
|
||||||
|
*
|
||||||
|
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
||||||
|
*
|
||||||
|
* @param other the recalibration report to combine with this one
|
||||||
|
*/
|
||||||
|
public void combine(RecalibrationReport other) {
|
||||||
|
Iterator<Map<BitSet, RecalDatum>> tableIterator = keysAndTablesMap.values().iterator(); // because these are ordered (linked hashmaps) we can iterate over the 'this' and do a for loop on the 'other' tables and be sure that we are looking at the equivalent tables on both objects
|
||||||
|
for (Map<BitSet, RecalDatum> otherTable : other.getKeysAndTablesMap().values()) { // iterate over all tables for 'other'
|
||||||
|
Map<BitSet, RecalDatum> thisTable = tableIterator.next(); // iterate over all tables for 'this'
|
||||||
|
for (Map.Entry<BitSet, RecalDatum> entry : otherTable.entrySet()) { // for each table, go through all the entries in the 'other' dataset to update 'this' dataset
|
||||||
|
BitSet key = entry.getKey();
|
||||||
|
RecalDatum otherDatum = entry.getValue();
|
||||||
|
RecalDatum thisDatum = thisTable.get(key);
|
||||||
|
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
|
||||||
|
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public QuantizationInfo getQuantizationInfo() {
|
||||||
|
return quantizationInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
public LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> getKeysAndTablesMap() {
|
||||||
|
return keysAndTablesMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<Covariate> getRequestedCovariates() {
|
||||||
|
return requestedCovariates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
||||||
|
*
|
||||||
|
* @param keyManager the key manager for this table
|
||||||
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
|
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||||
|
*/
|
||||||
|
private Map<BitSet, RecalDatum> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||||
|
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||||
|
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
||||||
|
* @param keyManager the key manager for this table
|
||||||
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
|
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||||
|
*/
|
||||||
|
private Map<BitSet, RecalDatum> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||||
|
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||||
|
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
||||||
|
*
|
||||||
|
* @param keyManager the key manager for this table
|
||||||
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
|
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||||
|
*/
|
||||||
|
private Map<BitSet, RecalDatum> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
||||||
|
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
||||||
|
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
||||||
|
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared parsing functionality for all tables.
|
||||||
|
*
|
||||||
|
* @param keyManager the key manager for this table
|
||||||
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
|
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
|
||||||
|
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||||
|
*/
|
||||||
|
private Map<BitSet, RecalDatum> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList) {
|
||||||
|
Map<BitSet, RecalDatum> result = new HashMap<BitSet, RecalDatum>(reportTable.getNumRows()*2);
|
||||||
|
|
||||||
|
for (Object primaryKey : reportTable.getPrimaryKeys()) {
|
||||||
|
int nKeys = columnNamesOrderedList.size();
|
||||||
|
Object [] keySet = new Object[nKeys];
|
||||||
|
for (int i = 0; i < nKeys; i++)
|
||||||
|
keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
||||||
|
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
|
||||||
|
BitSet bitKey = keyManager.bitSetFromKey(keySet);
|
||||||
|
|
||||||
|
long nObservations = (Long) reportTable.get(primaryKey, RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||||
|
long nErrors = (Long) reportTable.get(primaryKey, RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||||
|
double estimatedQReported = (Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME);
|
||||||
|
double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
||||||
|
RecalDatum recalDatum = new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||||
|
|
||||||
|
result.put(bitKey, recalDatum);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
||||||
|
*
|
||||||
|
* @param table the GATKReportTable containing the quantization mappings
|
||||||
|
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
|
||||||
|
*/
|
||||||
|
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
|
||||||
|
Byte[] quals = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||||
|
Long[] counts = new Long[QualityUtils.MAX_QUAL_SCORE + 1];
|
||||||
|
for (Object primaryKey : table.getPrimaryKeys()) {
|
||||||
|
Object quantizedObject = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
||||||
|
Object countObject = table.get(primaryKey, RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||||
|
byte originalQual = Byte.parseByte(primaryKey.toString());
|
||||||
|
byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||||
|
long quantizedCount = Long.parseLong(countObject.toString());
|
||||||
|
quals[originalQual] = quantizedQual;
|
||||||
|
counts[originalQual] = quantizedCount;
|
||||||
|
}
|
||||||
|
return new QuantizationInfo(Arrays.asList(quals), Arrays.asList(counts));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values
|
||||||
|
*
|
||||||
|
* @param table the GATKReportTable containing the arguments and its corresponding values
|
||||||
|
* @return a RAC object properly initialized with all the objects in the table
|
||||||
|
*/
|
||||||
|
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
|
||||||
|
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
|
|
||||||
|
for (Object primaryKey : table.getPrimaryKeys()) {
|
||||||
|
Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||||
|
if (value.equals("null"))
|
||||||
|
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
||||||
|
|
||||||
|
if (primaryKey.equals("covariate") && value != null)
|
||||||
|
RAC.COVARIATES = value.toString().split(",");
|
||||||
|
|
||||||
|
else if (primaryKey.equals("standard_covs"))
|
||||||
|
RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("solid_recal_mode"))
|
||||||
|
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("solid_nocall_strategy"))
|
||||||
|
RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("mismatches_context_size"))
|
||||||
|
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("insertions_context_size"))
|
||||||
|
RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("deletions_context_size"))
|
||||||
|
RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("mismatches_default_quality"))
|
||||||
|
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("insertions_default_quality"))
|
||||||
|
RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("deletions_default_quality"))
|
||||||
|
RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("low_quality_tail"))
|
||||||
|
RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("default_platform"))
|
||||||
|
RAC.DEFAULT_PLATFORM = (String) value;
|
||||||
|
|
||||||
|
else if (primaryKey.equals("force_platform"))
|
||||||
|
RAC.FORCE_PLATFORM = (String) value;
|
||||||
|
|
||||||
|
else if (primaryKey.equals("quantizing_levels"))
|
||||||
|
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return RAC;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this functionality avoids recalculating the empirical qualities, estimated reported quality
|
||||||
|
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||||
|
*/
|
||||||
|
public void calculateEmpiricalAndQuantizedQualities() {
|
||||||
|
quantizationInfo.quantizeQualityScores(RAC.QUANTIZING_LEVELS);
|
||||||
|
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values()) {
|
||||||
|
for (RecalDatum datum : table.values()) {
|
||||||
|
datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE);
|
||||||
|
datum.calcEstimatedReportedQuality();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void output(PrintStream output) {
|
||||||
|
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, keysAndTablesMap, output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,13 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.EmpiricalQual;
|
|
||||||
import org.broadinstitute.sting.utils.BitSetUtils;
|
import org.broadinstitute.sting.utils.BitSetUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
|
@ -46,224 +42,27 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class BaseRecalibration {
|
public class BaseRecalibration {
|
||||||
private List<Byte> qualQuantizationMap; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
private QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||||
private LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||||
|
|
||||||
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||||
|
|
||||||
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code";
|
||||||
private static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
|
private static String TOO_MANY_KEYS_EXCEPTION = "There should only be one key for the RG collapsed table, something went wrong here";
|
||||||
|
|
||||||
/**
|
|
||||||
* Should ALWAYS use the constructor with the GATK Report file
|
|
||||||
*/
|
|
||||||
private BaseRecalibration() {}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor using a GATK Report file
|
* Constructor using a GATK Report file
|
||||||
*
|
*
|
||||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||||
*/
|
*/
|
||||||
public BaseRecalibration(final File RECAL_FILE) {
|
public BaseRecalibration(final File RECAL_FILE) {
|
||||||
GATKReport report = new GATKReport(RECAL_FILE);
|
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||||
|
|
||||||
GATKReportTable argumentTable = report.getTable(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE);
|
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
||||||
RecalibrationArgumentCollection RAC = initializeArgumentCollectionTable(argumentTable);
|
keysAndTablesMap = recalibrationReport.getKeysAndTablesMap();
|
||||||
|
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
||||||
GATKReportTable quantizedTable = report.getTable(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE);
|
|
||||||
qualQuantizationMap = initializeQuantizationTable(quantizedTable);
|
|
||||||
|
|
||||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalDataManager.initializeCovariates(RAC); // initialize the required and optional covariates
|
|
||||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
|
||||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
|
||||||
requestedCovariates.addAll(requiredCovariates); // add all required covariates to the list of requested covariates
|
|
||||||
requestedCovariates.addAll(optionalCovariates); // add all optional covariates to the list of requested covariates
|
|
||||||
|
|
||||||
for (Covariate cov : requestedCovariates)
|
|
||||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
|
||||||
|
|
||||||
keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, EmpiricalQual>>();
|
|
||||||
ArrayList<Covariate> requiredCovariatesToAdd = new ArrayList<Covariate>(requiredCovariates.size()); // incrementally add the covariates to create the recal tables with 1, 2 and 3 covariates.
|
|
||||||
ArrayList<Covariate> optionalCovariatesToAdd = new ArrayList<Covariate>(); // initialize an empty array of optional covariates to create the first few tables
|
|
||||||
for (Covariate covariate : requiredCovariates) {
|
|
||||||
requiredCovariatesToAdd.add(covariate);
|
|
||||||
final Map<BitSet, EmpiricalQual> table; // initializing a new recal table for each required covariate (cumulatively)
|
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
|
||||||
|
|
||||||
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
|
||||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
|
||||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
|
||||||
table = parseReadGroupTable(keyManager, reportTable);
|
|
||||||
}
|
|
||||||
else if (nRequiredCovariates == 2 && optionalCovariatesToAdd.isEmpty()) { // when we have both required covariates and no optional covariates we're at the QUAL table
|
|
||||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
|
||||||
table = parseQualityScoreTable(keyManager, reportTable);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw new ReviewedStingException(UNRECOGNIZED_REPORT_TABLE_EXCEPTION);
|
|
||||||
|
|
||||||
keysAndTablesMap.put(keyManager, table); // adding the pair key+table to the map
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initializing it's corresponding key manager
|
|
||||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
|
||||||
final Map<BitSet, EmpiricalQual> table = parseAllCovariatesTable(keyManager, reportTable);
|
|
||||||
keysAndTablesMap.put(keyManager, table); // adding the pair table+key to the map
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
|
||||||
*
|
|
||||||
* @param keyManager the key manager for this table
|
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
|
||||||
*/
|
|
||||||
private Map<BitSet, EmpiricalQual> parseAllCovariatesTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
|
||||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(5);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
|
||||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
|
||||||
* @param keyManager the key manager for this table
|
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
|
||||||
*/
|
|
||||||
private Map<BitSet, EmpiricalQual> parseQualityScoreTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
|
||||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(3);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
|
||||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
|
||||||
*
|
|
||||||
* @param keyManager the key manager for this table
|
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
|
||||||
*/
|
|
||||||
private Map<BitSet, EmpiricalQual> parseReadGroupTable(BQSRKeyManager keyManager, GATKReportTable reportTable) {
|
|
||||||
ArrayList<String> columnNamesOrderedList = new ArrayList<String>(2);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.READGROUP_COLUMN_NAME);
|
|
||||||
columnNamesOrderedList.add(RecalDataManager.EVENT_TYPE_COLUMN_NAME);
|
|
||||||
return genericRecalTableParsing(keyManager, reportTable, columnNamesOrderedList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Shared parsing functionality for all tables.
|
|
||||||
*
|
|
||||||
* @param keyManager the key manager for this table
|
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
|
||||||
* @param columnNamesOrderedList a list of columns to read from the report table and build as key for this particular table
|
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
|
||||||
*/
|
|
||||||
private Map<BitSet, EmpiricalQual> genericRecalTableParsing(BQSRKeyManager keyManager, GATKReportTable reportTable, ArrayList<String> columnNamesOrderedList) {
|
|
||||||
Map<BitSet, EmpiricalQual> result = new HashMap<BitSet, EmpiricalQual>(reportTable.getNumRows()*2);
|
|
||||||
|
|
||||||
for (Object primaryKey : reportTable.getPrimaryKeys()) {
|
|
||||||
int nKeys = columnNamesOrderedList.size();
|
|
||||||
Object [] keySet = new Object[nKeys];
|
|
||||||
for (int i = 0; i < nKeys; i++)
|
|
||||||
keySet[i] = reportTable.get(primaryKey, columnNamesOrderedList.get(i)); // all these objects are okay in String format, the key manager will handle them correctly (except for the event type (see below)
|
|
||||||
keySet[keySet.length-1] = EventType.eventFrom((String) keySet[keySet.length-1]); // the last key is always the event type. We convert the string ("M", "I" or "D") to an enum object (necessary for the key manager).
|
|
||||||
BitSet bitKey = keyManager.bitSetFromKey(keySet);
|
|
||||||
|
|
||||||
double estimatedQReported = (Double) reportTable.get(primaryKey, RecalDataManager.ESTIMATED_Q_REPORTED_COLUMN_NAME);
|
|
||||||
double empiricalQuality = (Double) reportTable.get(primaryKey, RecalDataManager.EMPIRICAL_QUALITY_COLUMN_NAME);
|
|
||||||
EmpiricalQual empiricalQual = new EmpiricalQual(estimatedQReported, empiricalQuality);
|
|
||||||
|
|
||||||
result.put(bitKey, empiricalQual);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
|
||||||
*
|
|
||||||
* @param table the GATKReportTable containing the quantization mappings
|
|
||||||
* @return an ArrayList with the quantization mappings from 0 to MAX_QUAL_SCORE
|
|
||||||
*/
|
|
||||||
private List<Byte> initializeQuantizationTable(GATKReportTable table) {
|
|
||||||
Byte[] result = new Byte[QualityUtils.MAX_QUAL_SCORE + 1];
|
|
||||||
for (Object primaryKey : table.getPrimaryKeys()) {
|
|
||||||
Object value = table.get(primaryKey, RecalDataManager.QUANTIZED_VALUE_COLUMN_NAME);
|
|
||||||
byte originalQual = Byte.parseByte(primaryKey.toString());
|
|
||||||
byte quantizedQual = Byte.parseByte(value.toString());
|
|
||||||
result[originalQual] = quantizedQual;
|
|
||||||
}
|
|
||||||
return Arrays.asList(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values
|
|
||||||
*
|
|
||||||
* @param table the GATKReportTable containing the arguments and its corresponding values
|
|
||||||
* @return a RAC object properly initialized with all the objects in the table
|
|
||||||
*/
|
|
||||||
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
|
|
||||||
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
|
||||||
|
|
||||||
for (Object primaryKey : table.getPrimaryKeys()) {
|
|
||||||
Object value = table.get(primaryKey, RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
|
||||||
if (value.equals("null"))
|
|
||||||
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
|
||||||
|
|
||||||
if (primaryKey.equals("covariate") && value != null)
|
|
||||||
RAC.COVARIATES = value.toString().split(",");
|
|
||||||
|
|
||||||
else if (primaryKey.equals("standard_covs"))
|
|
||||||
RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("solid_recal_mode"))
|
|
||||||
RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("solid_nocall_strategy"))
|
|
||||||
RAC.SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("mismatches_context_size"))
|
|
||||||
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("insertions_context_size"))
|
|
||||||
RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("deletions_context_size"))
|
|
||||||
RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("mismatches_default_quality"))
|
|
||||||
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("insertions_default_quality"))
|
|
||||||
RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("deletions_default_quality"))
|
|
||||||
RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("low_quality_tail"))
|
|
||||||
RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value);
|
|
||||||
|
|
||||||
else if (primaryKey.equals("default_platform"))
|
|
||||||
RAC.DEFAULT_PLATFORM = (String) value;
|
|
||||||
|
|
||||||
else if (primaryKey.equals("force_platform"))
|
|
||||||
RAC.FORCE_PLATFORM = (String) value;
|
|
||||||
|
|
||||||
else if (primaryKey.equals("quantizing_levels"))
|
|
||||||
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
|
|
||||||
}
|
|
||||||
|
|
||||||
return RAC;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recalibrates the base qualities of a read
|
* Recalibrates the base qualities of a read
|
||||||
*
|
*
|
||||||
|
|
@ -316,9 +115,9 @@ public class BaseRecalibration {
|
||||||
double deltaQReported = 0.0;
|
double deltaQReported = 0.0;
|
||||||
double deltaQCovariates = 0.0;
|
double deltaQCovariates = 0.0;
|
||||||
|
|
||||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, EmpiricalQual>> mapEntry : keysAndTablesMap.entrySet()) {
|
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> mapEntry : keysAndTablesMap.entrySet()) {
|
||||||
BQSRKeyManager keyManager = mapEntry.getKey();
|
BQSRKeyManager keyManager = mapEntry.getKey();
|
||||||
Map<BitSet, EmpiricalQual> table = mapEntry.getValue();
|
Map<BitSet, RecalDatum> table = mapEntry.getValue();
|
||||||
|
|
||||||
switch(keyManager.getRequiredCovariates().size()) {
|
switch(keyManager.getRequiredCovariates().size()) {
|
||||||
case 1: // this is the ReadGroup table
|
case 1: // this is the ReadGroup table
|
||||||
|
|
@ -326,7 +125,7 @@ public class BaseRecalibration {
|
||||||
if (bitKeys.size() > 1)
|
if (bitKeys.size() > 1)
|
||||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||||
|
|
||||||
final EmpiricalQual empiricalQualRG = table.get(bitKeys.get(0));
|
final RecalDatum empiricalQualRG = table.get(bitKeys.get(0));
|
||||||
if (empiricalQualRG != null) {
|
if (empiricalQualRG != null) {
|
||||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||||
|
|
@ -339,7 +138,7 @@ public class BaseRecalibration {
|
||||||
if (bitKeys.size() > 1)
|
if (bitKeys.size() > 1)
|
||||||
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
throw new ReviewedStingException(TOO_MANY_KEYS_EXCEPTION);
|
||||||
|
|
||||||
final EmpiricalQual empiricalQualQS = table.get(bitKeys.get(0));
|
final RecalDatum empiricalQualQS = table.get(bitKeys.get(0));
|
||||||
if (empiricalQualQS != null) {
|
if (empiricalQualQS != null) {
|
||||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||||
|
|
@ -348,7 +147,7 @@ public class BaseRecalibration {
|
||||||
else { // this is the table with all the covariates
|
else { // this is the table with all the covariates
|
||||||
bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn
|
bitKeys = keyManager.bitSetsFromAllKeys(key, errorModel); // calculate the shift in quality due to each covariate by itself in turn
|
||||||
for (BitSet k : bitKeys) {
|
for (BitSet k : bitKeys) {
|
||||||
final EmpiricalQual empiricalQualCO = table.get(k);
|
final RecalDatum empiricalQualCO = table.get(k);
|
||||||
if (empiricalQualCO != null) {
|
if (empiricalQualCO != null) {
|
||||||
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||||
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||||
|
|
@ -364,7 +163,8 @@ public class BaseRecalibration {
|
||||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||||
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
recalibratedQual = QualityUtils.boundQual((int) Math.round(recalibratedQual), QualityUtils.MAX_QUAL_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||||
|
|
||||||
return qualQuantizationMap.get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
|
||||||
|
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,476 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Invariant;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A general algorithm for quantizing quality score distributions to use a specific number of levels
|
||||||
|
*
|
||||||
|
* Takes a histogram of quality scores and a desired number of levels and produces a
|
||||||
|
* map from original quality scores -> quantized quality scores.
|
||||||
|
*
|
||||||
|
* Note that this data structure is fairly heavy-weight, holding lots of debugging and
|
||||||
|
* calculation information. If you want to use it efficiently at scale with lots of
|
||||||
|
* read groups the right way to do this:
|
||||||
|
*
|
||||||
|
* Map<ReadGroup, List<Byte>> map
|
||||||
|
* for each read group rg:
|
||||||
|
* hist = getQualHist(rg)
|
||||||
|
* QualQuantizer qq = new QualQuantizer(hist, nLevels, minInterestingQual)
|
||||||
|
* map.set(rg, qq.getOriginalToQuantizedMap())
|
||||||
|
*
|
||||||
|
* This map would then be used to look up the appropriate original -> quantized
|
||||||
|
* quals for each read as it comes in.
|
||||||
|
*
|
||||||
|
* @author Mark Depristo
|
||||||
|
* @since 3/2/12
|
||||||
|
*/
|
||||||
|
public class QualQuantizer {
|
||||||
|
final private static Set<QualInterval> MY_EMPTY_SET = Collections.emptySet();
|
||||||
|
|
||||||
|
private static Logger logger = Logger.getLogger(QualQuantizer.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inputs to the QualQuantizer
|
||||||
|
*/
|
||||||
|
final int nLevels, minInterestingQual;
|
||||||
|
final List<Long> nObservationsPerQual;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map from original qual (e.g., Q30) to new quantized qual (e.g., Q28).
|
||||||
|
*
|
||||||
|
* Has the same range as nObservationsPerQual
|
||||||
|
*/
|
||||||
|
final List<Byte> originalToQuantizedMap;
|
||||||
|
|
||||||
|
/** Sorted set of qual intervals.
|
||||||
|
*
|
||||||
|
* After quantize() this data structure contains only the top-level qual intervals
|
||||||
|
*/
|
||||||
|
final TreeSet<QualInterval> quantizedIntervals;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Protected creator for testng use only
|
||||||
|
*/
|
||||||
|
protected QualQuantizer(final int minInterestingQual) {
|
||||||
|
this.nObservationsPerQual = Collections.emptyList();
|
||||||
|
this.nLevels = 0;
|
||||||
|
this.minInterestingQual = minInterestingQual;
|
||||||
|
this.quantizedIntervals = null;
|
||||||
|
this.originalToQuantizedMap = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a QualQuantizer for the histogram that has nLevels
|
||||||
|
*
|
||||||
|
* Note this is the only interface to the system. After creating this object
|
||||||
|
* the map can be obtained via getOriginalToQuantizedMap()
|
||||||
|
*
|
||||||
|
* @param nObservationsPerQual A histogram of counts of bases with quality scores. Note that
|
||||||
|
* this histogram must start at 0 (i.e., get(0) => count of Q0 bases) and must include counts all the
|
||||||
|
* way up to the largest quality score possible in the reads. OK if the histogram includes many 0
|
||||||
|
* count bins, as these are quantized for free.
|
||||||
|
* @param nLevels the desired number of distinct quality scores to represent the full original range. Must
|
||||||
|
* be at least 1.
|
||||||
|
* @param minInterestingQual All quality scores <= this value are considered uninteresting and are freely
|
||||||
|
* merged together. For example, if this value is 10, then Q0-Q10 are all considered free to merge, and
|
||||||
|
* quantized into a single value. For ILMN data with lots of Q2 bases this results in a Q2 bin containing
|
||||||
|
* all data with Q0-Q10.
|
||||||
|
*/
|
||||||
|
public QualQuantizer(final List<Long> nObservationsPerQual, final int nLevels, final int minInterestingQual) {
|
||||||
|
this.nObservationsPerQual = nObservationsPerQual;
|
||||||
|
this.nLevels = nLevels;
|
||||||
|
this.minInterestingQual = minInterestingQual;
|
||||||
|
|
||||||
|
// some sanity checking
|
||||||
|
if ( Collections.min(nObservationsPerQual) < 0 ) throw new ReviewedStingException("Quality score histogram has negative values at: " + Utils.join(", ", nObservationsPerQual));
|
||||||
|
if ( nLevels < 0 ) throw new ReviewedStingException("nLevels must be >= 0");
|
||||||
|
if ( minInterestingQual < 0 ) throw new ReviewedStingException("minInterestingQual must be >= 0");
|
||||||
|
|
||||||
|
// actually run the quantizer
|
||||||
|
this.quantizedIntervals = quantize();
|
||||||
|
|
||||||
|
// store the map
|
||||||
|
this.originalToQuantizedMap = intervalsToMap(quantizedIntervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents an contiguous interval of quality scores.
|
||||||
|
*
|
||||||
|
* qStart and qEnd are inclusive, so qStart = qEnd = 2 is the quality score bin of 2
|
||||||
|
*/
|
||||||
|
@Invariant({
|
||||||
|
"qStart <= qEnd",
|
||||||
|
"qStart >= 0",
|
||||||
|
"qEnd <= 1000",
|
||||||
|
"nObservations >= 0",
|
||||||
|
"nErrors >= 0",
|
||||||
|
"nErrors <= nObservations",
|
||||||
|
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_QUAL_SCORE",
|
||||||
|
"mergeOrder >= 0"})
|
||||||
|
protected final class QualInterval implements Comparable<QualInterval> {
|
||||||
|
final int qStart, qEnd, fixedQual, level;
|
||||||
|
final long nObservations, nErrors;
|
||||||
|
final Set<QualInterval> subIntervals;
|
||||||
|
|
||||||
|
/** for debugging / visualization. When was this interval created? */
|
||||||
|
int mergeOrder;
|
||||||
|
|
||||||
|
protected QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level) {
|
||||||
|
this(qStart, qEnd, nObservations, nErrors, level, -1, MY_EMPTY_SET);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level, final Set<QualInterval> subIntervals) {
|
||||||
|
this(qStart, qEnd, nObservations, nErrors, level, -1, subIntervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level, final int fixedQual) {
|
||||||
|
this(qStart, qEnd, nObservations, nErrors, level, fixedQual, MY_EMPTY_SET);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("level >= 0")
|
||||||
|
public QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level, final int fixedQual, final Set<QualInterval> subIntervals) {
|
||||||
|
this.qStart = qStart;
|
||||||
|
this.qEnd = qEnd;
|
||||||
|
this.nObservations = nObservations;
|
||||||
|
this.nErrors = nErrors;
|
||||||
|
this.fixedQual = fixedQual;
|
||||||
|
this.level = level;
|
||||||
|
this.mergeOrder = 0;
|
||||||
|
this.subIntervals = Collections.unmodifiableSet(subIntervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Human readable name of this interval: e.g., 10-12
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getName() {
|
||||||
|
return qStart + "-" + qEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "QQ:" + getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the error rate (in real space) of this interval, or 0 if there are no obserations
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Ensures("result >= 0.0")
|
||||||
|
public double getErrorRate() {
|
||||||
|
if ( hasFixedQual() )
|
||||||
|
return QualityUtils.qualToErrorProb((byte)fixedQual);
|
||||||
|
else if ( nObservations == 0 )
|
||||||
|
return 0.0;
|
||||||
|
else
|
||||||
|
return (nErrors+1) / (1.0 * (nObservations+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the QUAL of the error rate of this interval, or the fixed
|
||||||
|
* qual if this interval was created with a fixed qual.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE")
|
||||||
|
public byte getQual() {
|
||||||
|
if ( ! hasFixedQual() )
|
||||||
|
return QualityUtils.probToQual(1-getErrorRate(), 0);
|
||||||
|
else
|
||||||
|
return (byte)fixedQual;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if this bin is using a fixed qual
|
||||||
|
*/
|
||||||
|
public boolean hasFixedQual() {
|
||||||
|
return fixedQual != -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(final QualInterval qualInterval) {
|
||||||
|
return new Integer(this.qStart).compareTo(qualInterval.qStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a interval representing the merge of this interval and toMerge
|
||||||
|
*
|
||||||
|
* Errors and observations are combined
|
||||||
|
* Subintervals updated in order of left to right (determined by qStart)
|
||||||
|
* Level is 1 + highest level of this and toMerge
|
||||||
|
* Order must be updated elsewhere
|
||||||
|
*
|
||||||
|
* @param toMerge
|
||||||
|
* @return newly created merged QualInterval
|
||||||
|
*/
|
||||||
|
@Requires({"toMerge != null"})
|
||||||
|
@Ensures({
|
||||||
|
"result != null",
|
||||||
|
"result.nObservations >= this.nObservations",
|
||||||
|
"result.nObservations >= toMerge.nObservations",
|
||||||
|
"result.nErrors >= this.nErrors",
|
||||||
|
"result.nErrors >= toMerge.nErrors",
|
||||||
|
"result.qStart == Math.min(this.qStart, toMerge.qStart)",
|
||||||
|
"result.qEnd == Math.max(this.qEnd, toMerge.qEnd)",
|
||||||
|
"result.level > Math.max(this.level, toMerge.level)",
|
||||||
|
"result.subIntervals.size() == 2"
|
||||||
|
})
|
||||||
|
public QualInterval merge(final QualInterval toMerge) {
|
||||||
|
final QualInterval left = this.compareTo(toMerge) < 0 ? this : toMerge;
|
||||||
|
final QualInterval right = this.compareTo(toMerge) < 0 ? toMerge : this;
|
||||||
|
|
||||||
|
if ( left.qEnd + 1 != right.qStart )
|
||||||
|
throw new ReviewedStingException("Attempting to merge non-continguous intervals: left = " + left + " right = " + right);
|
||||||
|
|
||||||
|
final long nCombinedObs = left.nObservations + right.nObservations;
|
||||||
|
final long nCombinedErr = left.nErrors + right.nErrors;
|
||||||
|
|
||||||
|
final int level = Math.max(left.level, right.level) + 1;
|
||||||
|
final Set<QualInterval> subIntervals = new HashSet<QualInterval>(Arrays.asList(left, right));
|
||||||
|
QualInterval merged = new QualInterval(left.qStart, right.qEnd, nCombinedObs, nCombinedErr, level, subIntervals);
|
||||||
|
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getPenalty() {
|
||||||
|
return calcPenalty(getErrorRate());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the penalty of this interval, given the overall error rate for the interval
|
||||||
|
*
|
||||||
|
* If the globalErrorRate is e, this value is:
|
||||||
|
*
|
||||||
|
* sum_i |log10(e_i) - log10(e)| * nObservations_i
|
||||||
|
*
|
||||||
|
* each the index i applies to all leaves of the tree accessible from this interval
|
||||||
|
* (found recursively from subIntervals as necessary)
|
||||||
|
*
|
||||||
|
* @param globalErrorRate overall error rate in real space against which we calculate the penalty
|
||||||
|
* @return the cost of approximating the bins in this interval with the globalErrorRate
|
||||||
|
*/
|
||||||
|
@Requires("globalErrorRate >= 0.0")
|
||||||
|
@Ensures("result >= 0.0")
|
||||||
|
private double calcPenalty(final double globalErrorRate) {
|
||||||
|
if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
if ( subIntervals.isEmpty() ) {
|
||||||
|
// this is leave node
|
||||||
|
if ( this.qEnd <= minInterestingQual )
|
||||||
|
// It's free to merge up quality scores below the smallest interesting one
|
||||||
|
return 0;
|
||||||
|
else {
|
||||||
|
return (Math.abs(Math.log10(getErrorRate()) - Math.log10(globalErrorRate))) * nObservations;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
double sum = 0;
|
||||||
|
for ( final QualInterval interval : subIntervals )
|
||||||
|
sum += interval.calcPenalty(globalErrorRate);
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main method for computing the quantization intervals.
|
||||||
|
*
|
||||||
|
* Invoked in the constructor after all input variables are initialized. Walks
|
||||||
|
* over the inputs and builds the min. penalty forest of intervals with exactly nLevel
|
||||||
|
* root nodes. Finds this min. penalty forest via greedy search, so is not guarenteed
|
||||||
|
* to find the optimal combination.
|
||||||
|
*
|
||||||
|
* TODO: develop a smarter algorithm
|
||||||
|
*
|
||||||
|
* @return the forest of intervals with size == nLevels
|
||||||
|
*/
|
||||||
|
@Ensures({"! result.isEmpty()", "result.size() == nLevels"})
|
||||||
|
private TreeSet<QualInterval> quantize() {
|
||||||
|
// create intervals for each qual individually
|
||||||
|
final TreeSet<QualInterval> intervals = new TreeSet<QualInterval>();
|
||||||
|
for ( int qStart = 0; qStart < getNQualsInHistogram(); qStart++ ) {
|
||||||
|
final long nObs = nObservationsPerQual.get(qStart);
|
||||||
|
final double errorRate = QualityUtils.qualToErrorProb((byte)qStart);
|
||||||
|
final double nErrors = nObs * errorRate;
|
||||||
|
final QualInterval qi = new QualInterval(qStart, qStart, nObs, (int)Math.floor(nErrors), 0, (byte)qStart);
|
||||||
|
intervals.add(qi);
|
||||||
|
}
|
||||||
|
|
||||||
|
// greedy algorithm:
|
||||||
|
// while ( n intervals >= nLevels ):
|
||||||
|
// find intervals to merge with least penalty
|
||||||
|
// merge it
|
||||||
|
while ( intervals.size() > nLevels ) {
|
||||||
|
mergeLowestPenaltyIntervals(intervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
return intervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function that finds and mergest together the lowest penalty pair
|
||||||
|
* of intervals
|
||||||
|
* @param intervals
|
||||||
|
*/
|
||||||
|
@Requires("! intervals.isEmpty()")
|
||||||
|
private void mergeLowestPenaltyIntervals(final TreeSet<QualInterval> intervals) {
|
||||||
|
// setup the iterators
|
||||||
|
final Iterator<QualInterval> it1 = intervals.iterator();
|
||||||
|
final Iterator<QualInterval> it1p = intervals.iterator();
|
||||||
|
it1p.next(); // skip one
|
||||||
|
|
||||||
|
// walk over the pairs of left and right, keeping track of the pair with the lowest merge penalty
|
||||||
|
QualInterval minMerge = null;
|
||||||
|
if ( logger.isDebugEnabled() ) logger.debug("mergeLowestPenaltyIntervals: " + intervals.size());
|
||||||
|
int lastMergeOrder = 0;
|
||||||
|
while ( it1p.hasNext() ) {
|
||||||
|
final QualInterval left = it1.next();
|
||||||
|
final QualInterval right = it1p.next();
|
||||||
|
final QualInterval merged = left.merge(right);
|
||||||
|
lastMergeOrder = Math.max(Math.max(lastMergeOrder, left.mergeOrder), right.mergeOrder);
|
||||||
|
if ( minMerge == null || (merged.getPenalty() < minMerge.getPenalty() ) ) {
|
||||||
|
if ( logger.isDebugEnabled() ) logger.debug(" Updating merge " + minMerge);
|
||||||
|
minMerge = merged;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now actually go ahead and merge the minMerge pair
|
||||||
|
if ( logger.isDebugEnabled() ) logger.debug(" => final min merge " + minMerge);
|
||||||
|
intervals.removeAll(minMerge.subIntervals);
|
||||||
|
intervals.add(minMerge);
|
||||||
|
minMerge.mergeOrder = lastMergeOrder + 1;
|
||||||
|
if ( logger.isDebugEnabled() ) logger.debug("updated intervals: " + intervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a final forest of intervals constructs a list mapping
|
||||||
|
* list.get(i) => quantized qual to use for original quality score i
|
||||||
|
*
|
||||||
|
* This function should be called only once to initialize the corresponding
|
||||||
|
* cached value in this object, as the calculation is a bit costly.
|
||||||
|
*
|
||||||
|
* @param intervals
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Ensures("result.size() == getNQualsInHistogram()")
|
||||||
|
private List<Byte> intervalsToMap(final TreeSet<QualInterval> intervals) {
|
||||||
|
final List<Byte> map = new ArrayList<Byte>(getNQualsInHistogram());
|
||||||
|
map.addAll(Collections.nCopies(getNQualsInHistogram(), Byte.MIN_VALUE));
|
||||||
|
for ( final QualInterval interval : intervals ) {
|
||||||
|
for ( int q = interval.qStart; q <= interval.qEnd; q++ ) {
|
||||||
|
map.set(q, interval.getQual());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Collections.min(map) == Byte.MIN_VALUE )
|
||||||
|
throw new ReviewedStingException("quantized quality score map contains an un-initialized value");
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result > 0")
|
||||||
|
private final int getNQualsInHistogram() {
|
||||||
|
return nObservationsPerQual.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write out a GATKReport to visualize the QualQuantization process of this data
|
||||||
|
* @param out
|
||||||
|
*/
|
||||||
|
public void writeReport(PrintStream out) {
|
||||||
|
final GATKReport report = new GATKReport();
|
||||||
|
|
||||||
|
addQualHistogramToReport(report);
|
||||||
|
addIntervalsToReport(report);
|
||||||
|
|
||||||
|
report.print(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void addQualHistogramToReport(final GATKReport report) {
|
||||||
|
report.addTable("QualHistogram", "Quality score histogram provided to report");
|
||||||
|
GATKReportTable table = report.getTable("QualHistogram");
|
||||||
|
|
||||||
|
table.addPrimaryKey("qual");
|
||||||
|
table.addColumn("count", "NA");
|
||||||
|
|
||||||
|
for ( int q = 0; q < nObservationsPerQual.size(); q++ ) {
|
||||||
|
table.set(q, "count", nObservationsPerQual.get(q));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private final void addIntervalsToReport(final GATKReport report) {
|
||||||
|
report.addTable("QualQuantizerIntervals", "Table of QualQuantizer quantization intervals");
|
||||||
|
GATKReportTable table = report.getTable("QualQuantizerIntervals");
|
||||||
|
|
||||||
|
table.addPrimaryKey("name");
|
||||||
|
table.addColumn("qStart", "NA");
|
||||||
|
table.addColumn("qEnd", "NA");
|
||||||
|
table.addColumn("level", "NA");
|
||||||
|
table.addColumn("merge.order", "NA");
|
||||||
|
table.addColumn("nErrors", "NA");
|
||||||
|
table.addColumn("nObservations", "NA");
|
||||||
|
table.addColumn("qual", "NA");
|
||||||
|
table.addColumn("penalty", "NA");
|
||||||
|
table.addColumn("root.node", "NA");
|
||||||
|
//table.addColumn("subintervals", "NA");
|
||||||
|
|
||||||
|
for ( QualInterval interval : quantizedIntervals)
|
||||||
|
addIntervalToReport(table, interval, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void addIntervalToReport(final GATKReportTable table, QualInterval interval, final boolean atRootP) {
|
||||||
|
final String name = interval.getName();
|
||||||
|
table.set(name, "qStart", interval.qStart);
|
||||||
|
table.set(name, "qEnd", interval.qEnd);
|
||||||
|
table.set(name, "level", interval.level);
|
||||||
|
table.set(name, "merge.order", interval.mergeOrder);
|
||||||
|
table.set(name, "nErrors", interval.nErrors);
|
||||||
|
table.set(name, "nObservations", interval.nObservations);
|
||||||
|
table.set(name, "qual", interval.getQual());
|
||||||
|
table.set(name, "penalty", String.format("%.1f", interval.getPenalty()));
|
||||||
|
table.set(name, "root.node", atRootP);
|
||||||
|
|
||||||
|
for ( final QualInterval sub : interval.subIntervals )
|
||||||
|
addIntervalToReport(table, sub, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Byte> getOriginalToQuantizedMap() {
|
||||||
|
return originalToQuantizedMap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -13,17 +16,69 @@ import java.util.List;
|
||||||
public class BQSRGathererUnitTest {
|
public class BQSRGathererUnitTest {
|
||||||
RecalibrationArgumentCollection RAC;
|
RecalibrationArgumentCollection RAC;
|
||||||
|
|
||||||
private static File recal1 = new File("public/testdata/exampleCSV.csv");
|
private static File recal = new File("public/testdata/exampleGRP.grp");
|
||||||
private static File recal2 = new File("public/testdata/exampleCSV.2.csv");
|
|
||||||
|
|
||||||
|
//todo -- this test doesnt work because the primary keys in different tables are not the same. Need to either implement "sort" for testing purposes on GATKReport or have a sophisticated comparison measure
|
||||||
@Test(enabled = false)
|
@Test(enabled = false)
|
||||||
public void testCombineTwoFiles() {
|
public void testCombineSimilarFiles() {
|
||||||
BQSRGatherer gatherer = new BQSRGatherer();
|
BQSRGatherer gatherer = new BQSRGatherer();
|
||||||
List<File> recalFiles = new LinkedList<File> ();
|
List<File> recalFiles = new LinkedList<File> ();
|
||||||
File output = new File("foo.csv");
|
File output = new File("foo.grp");
|
||||||
|
recalFiles.add(recal);
|
||||||
recalFiles.add(recal1);
|
recalFiles.add(recal);
|
||||||
recalFiles.add(recal2);
|
|
||||||
gatherer.gather(recalFiles, output);
|
gatherer.gather(recalFiles, output);
|
||||||
|
|
||||||
|
GATKReport originalReport = new GATKReport(recal);
|
||||||
|
GATKReport calculatedReport = new GATKReport(output);
|
||||||
|
for (GATKReportTable originalTable : originalReport.getTables()) {
|
||||||
|
GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
|
||||||
|
List<String> columnsToTest = new LinkedList<String>();
|
||||||
|
if (originalTable.getTableName().equals(RecalDataManager.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
|
||||||
|
columnsToTest.add(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME);
|
||||||
|
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (originalTable.getTableName().equals(RecalDataManager.QUANTIZED_REPORT_TABLE_TITLE)) {
|
||||||
|
columnsToTest.add(RecalDataManager.QUANTIZED_COUNT_COLUMN_NAME);
|
||||||
|
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (originalTable.getTableName().startsWith("RecalTable")) {
|
||||||
|
columnsToTest.add(RecalDataManager.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||||
|
columnsToTest.add(RecalDataManager.NUMBER_ERRORS_COLUMN_NAME);
|
||||||
|
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common testing functionality given the columns to test and the multiplication factor to the expected result
|
||||||
|
*
|
||||||
|
* @param original the original table
|
||||||
|
* @param calculated the calculated table
|
||||||
|
* @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor)
|
||||||
|
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
|
||||||
|
*/
|
||||||
|
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
|
||||||
|
for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys
|
||||||
|
for (String column : columnsToTest) {
|
||||||
|
Object actual = calculated.get(primaryKey, column);
|
||||||
|
Object expected = original.get(primaryKey, column);
|
||||||
|
|
||||||
|
if (factor != 1) {
|
||||||
|
if (expected instanceof Double)
|
||||||
|
expected = (Double) expected * factor;
|
||||||
|
else if (expected instanceof Long)
|
||||||
|
expected = (Long) expected * factor;
|
||||||
|
else if (expected instanceof Integer)
|
||||||
|
expected = (Integer) expected * factor;
|
||||||
|
else if (expected instanceof Byte) {
|
||||||
|
expected = (Byte) expected * factor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(actual, expected, "Primary key: " + primaryKey + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,132 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Mauricio Carneiro
|
||||||
|
* @since 3/7/12
|
||||||
|
*/
|
||||||
|
public class BQSRKeyManagerUnitTest {
|
||||||
|
RecalibrationArgumentCollection RAC;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void init() {
|
||||||
|
RAC = new RecalibrationArgumentCollection();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testCombineBitSets() {
|
||||||
|
final int nRequired = 2;
|
||||||
|
final ArrayList<Covariate> covariates = new ArrayList<Covariate>();
|
||||||
|
covariates.add(new ReadGroupCovariate());
|
||||||
|
covariates.add(new QualityScoreCovariate());
|
||||||
|
covariates.add(new CycleCovariate());
|
||||||
|
covariates.add(new ContextCovariate());
|
||||||
|
createReadAndTest(covariates, nRequired);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testOnlyRequiredCovariates() {
|
||||||
|
final int nRequired = 2;
|
||||||
|
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(2);
|
||||||
|
covariates.add(new ReadGroupCovariate());
|
||||||
|
covariates.add(new QualityScoreCovariate());
|
||||||
|
createReadAndTest(covariates, nRequired);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testOnlyOneCovariate() {
|
||||||
|
final int nRequired = 1;
|
||||||
|
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(2);
|
||||||
|
covariates.add(new ReadGroupCovariate());
|
||||||
|
createReadAndTest(covariates, nRequired);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createReadAndTest(List<Covariate> covariates, int nRequired) {
|
||||||
|
int readLength = 1000;
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");
|
||||||
|
read.setReadGroup(new GATKSAMReadGroupRecord("ID"));
|
||||||
|
read.getReadGroup().setPlatform("illumina");
|
||||||
|
|
||||||
|
runTestOnRead(read, covariates, nRequired);
|
||||||
|
read.setReadNegativeStrandFlag(true);
|
||||||
|
runTestOnRead(read, covariates, nRequired);
|
||||||
|
read.setReadPairedFlag(true);
|
||||||
|
read.setSecondOfPairFlag(true);
|
||||||
|
runTestOnRead(read, covariates, nRequired);
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
runTestOnRead(read, covariates, nRequired);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void runTestOnRead(GATKSAMRecord read, List<Covariate> covariateList, int nRequired) {
|
||||||
|
final BitSet[][][] covariateKeys = new BitSet[covariateList.size()][EventType.values().length][];
|
||||||
|
int i = 0;
|
||||||
|
for (Covariate cov : covariateList) {
|
||||||
|
cov.initialize(RAC);
|
||||||
|
CovariateValues covValues = cov.getValues(read);
|
||||||
|
covariateKeys[i][EventType.BASE_SUBSTITUTION.index] = covValues.getMismatches();
|
||||||
|
covariateKeys[i][EventType.BASE_INSERTION.index] = covValues.getInsertions();
|
||||||
|
covariateKeys[i][EventType.BASE_DELETION.index] = covValues.getDeletions();
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||||
|
List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
||||||
|
|
||||||
|
for (int j=0; j<nRequired; j++)
|
||||||
|
requiredCovariates.add(covariateList.get(j));
|
||||||
|
for (int j=nRequired; j<covariateList.size(); j++)
|
||||||
|
optionalCovariates.add(covariateList.get(j));
|
||||||
|
|
||||||
|
BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
|
|
||||||
|
for (int l = 0; l < read.getReadLength(); l++) {
|
||||||
|
for (int eventType = 0; eventType < EventType.values().length; eventType++) {
|
||||||
|
BitSet[] keySet = new BitSet[covariateList.size()];
|
||||||
|
Object[] expectedRequired = new Object[covariateList.size()];
|
||||||
|
Object[] expectedCovariate = new Object[covariateList.size()];
|
||||||
|
|
||||||
|
for (int j = 0; j < covariateList.size(); j++) {
|
||||||
|
keySet[j] = covariateKeys[j][eventType][l];
|
||||||
|
|
||||||
|
if (j < nRequired)
|
||||||
|
expectedRequired[j] = covariateList.get(j).keyFromBitSet(keySet[j]);
|
||||||
|
else
|
||||||
|
expectedCovariate[j - nRequired] = covariateList.get(j).keyFromBitSet(keySet[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<BitSet> hashKeys = keyManager.bitSetsFromAllKeys(keySet, EventType.eventFrom(eventType));
|
||||||
|
short cov = 0;
|
||||||
|
for (BitSet key : hashKeys) {
|
||||||
|
Object[] actual = keyManager.keySetFrom(key).toArray();
|
||||||
|
|
||||||
|
// Build the expected array
|
||||||
|
Object[] expected = new Object[nRequired + (optionalCovariates.size() > 0 ? 3 : 1)];
|
||||||
|
System.arraycopy(expectedRequired, 0, expected, 0, nRequired);
|
||||||
|
if (optionalCovariates.size() > 0) {
|
||||||
|
expected[expected.length-3] = expectedCovariate[cov];
|
||||||
|
expected[expected.length-2] = optionalCovariates.get(cov++).getClass().getSimpleName().split("Covariate")[0];
|
||||||
|
}
|
||||||
|
expected[expected.length-1] = EventType.eventFrom(eventType);
|
||||||
|
|
||||||
|
// System.out.println("Actual : " + Utils.join(",", Arrays.asList(actual)));
|
||||||
|
// System.out.println("Expected: " + Utils.join(",", Arrays.asList(expected)));
|
||||||
|
// System.out.println();
|
||||||
|
|
||||||
|
for (int k = 0; k < expected.length; k++)
|
||||||
|
Assert.assertEquals(actual[k], expected[k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -33,19 +33,15 @@ import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.BeforeSuite;
|
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.lang.reflect.Array;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue