BQSR triage #4
* fixed queue script plot file names * updated the ReadGroupCovariate to use the platform unit instead of sample + lane. * fixed plotting of marginalized reported qualities
This commit is contained in:
parent
d6277b70d8
commit
e440d0ce69
|
|
@ -1,52 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* <p> [Long description of the walker] </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2> <p> [Description of the Input] </p>
|
||||
*
|
||||
* <h2>Output</h2> <p> [Description of the Output] </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T [walker name]
|
||||
* </pre>
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 4/17/12
|
||||
*/
|
||||
public class AccuracyDatum extends RecalDatum {
|
||||
private final List<Double> accuracy = new LinkedList<Double>();
|
||||
private final List<Byte> reportedQualities = new LinkedList<Byte>();
|
||||
|
||||
public AccuracyDatum(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
super(recalDatum);
|
||||
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
|
||||
reportedQualities.add(originalQuality);
|
||||
}
|
||||
|
||||
public void combine(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
this.combine(recalDatum);
|
||||
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
|
||||
reportedQualities.add(originalQuality);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s,%.2f,%.2f", super.toString(), MathUtils.average(reportedQualities), MathUtils.average(accuracy));
|
||||
}
|
||||
|
||||
private static double calculateAccuracy(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
return recalDatum.getEmpiricalQuality() - originalQuality;
|
||||
}
|
||||
}
|
||||
|
|
@ -65,15 +65,19 @@ public class BQSRGatherer extends Gatherer {
|
|||
if (generalReport == null)
|
||||
throw new ReviewedStingException(EMPTY_INPUT_LIST);
|
||||
|
||||
RecalibrationArgumentCollection RAC = generalReport.getRAC();
|
||||
if (RAC.recalibrationReport != null) {
|
||||
RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else
|
||||
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
|
||||
generalReport.calculateEmpiricalAndQuantizedQualities();
|
||||
|
||||
RecalibrationArgumentCollection RAC = generalReport.getRAC();
|
||||
if (RAC.recalibrationReport != null && !RAC.NO_PLOTS) {
|
||||
File recal_out = new File(output.getName() + ".original");
|
||||
RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
else if (!RAC.NO_PLOTS) {
|
||||
File recal_out = new File(output.getName() + ".recal");
|
||||
RecalDataManager.generateRecalibrationPlot(recal_out, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||
}
|
||||
|
||||
generalReport.output(outputFile);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ public class BQSRKeyManager {
|
|||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new LinkedList<Object>();
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -47,10 +46,6 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
private final HashMap<String, Short> readGroupLookupTable = new HashMap<String, Short>();
|
||||
private final HashMap<Short, String> readGroupReverseLookupTable = new HashMap<Short, String>();
|
||||
private short nextId = 0;
|
||||
|
||||
private static final String LANE_TAG = "LN";
|
||||
private static final String SAMPLE_TAG = "SM";
|
||||
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
|
|
@ -105,31 +100,14 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gather the sample and lane information from the read group record and return sample.lane
|
||||
*
|
||||
* If the bam file is missing the lane information, it tries to use the id regex standardized
|
||||
* by the Broad Institute to extract the lane information
|
||||
*
|
||||
* If it fails to find either of the two pieces of information, will return the read group id instead.
|
||||
* If the sample has a PU tag annotation, return that. If not, return the read group id.
|
||||
*
|
||||
* @param rg the read group record
|
||||
* @return sample.lane or id if information is missing.
|
||||
* @return platform unit or readgroup id
|
||||
*/
|
||||
private String readGroupValueFromRG(GATKSAMReadGroupRecord rg) {
|
||||
String lane = rg.getLane(); // take the sample's lane from the read group lane tag
|
||||
String sample = rg.getSample(); // take the sample's name from the read group sample tag
|
||||
String value = rg.getId(); // initialize the return value with the read group ID in case we can't find the sample or the lane
|
||||
|
||||
if (lane == null) { // if this bam doesn't have the lane annotation in the read group try to take it from the read group id
|
||||
String [] splitID = rg.getId().split(Pattern.quote("."));
|
||||
if (splitID.length > 1) // if the id doesn't follow the BROAD defined regex (PU.LANE), fall back to the read group id
|
||||
lane = splitID[splitID.length - 1]; // take the lane from the readgroup id
|
||||
}
|
||||
|
||||
if (sample != null && lane != null)
|
||||
value = sample + "." + lane; // the read group covariate is sample.lane (where the inforamtion is available)
|
||||
|
||||
return value;
|
||||
String platformUnit = rg.getPlatformUnit();
|
||||
return platformUnit == null ? rg.getId() : platformUnit;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -356,39 +356,65 @@ public class RecalDataManager {
|
|||
|
||||
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||
final int QUALITY_SCORE_COVARIATE_INDEX = 1;
|
||||
final Map<BitSet, AccuracyDatum> deltaTable = new HashMap<BitSet, AccuracyDatum>();
|
||||
final Map<BitSet, RecalDatum> deltaTable = new HashMap<BitSet, RecalDatum>();
|
||||
BQSRKeyManager deltaKeyManager = null;
|
||||
|
||||
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getOptionalCovariates().size() > 0) { // only need the 'all covariates' table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
if (keyManager.getOptionalCovariates().size() > 0) { // initialize with the 'all covariates' table
|
||||
// create a key manager for the delta table
|
||||
List<Covariate> requiredCovariates = keyManager.getRequiredCovariates().subList(0, 1); // include the read group covariate as the only required covariate
|
||||
List<Covariate> optionalCovariates = keyManager.getRequiredCovariates().subList(1, 2); // include the quality score covariate as an optional covariate
|
||||
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates
|
||||
BQSRKeyManager deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||
}
|
||||
}
|
||||
|
||||
if (deltaKeyManager == null)
|
||||
throw new ReviewedStingException ("Couldn't find the covariates table");
|
||||
|
||||
// create delta table
|
||||
boolean readyToPrint = false;
|
||||
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||
|
||||
if (keyManager.getRequiredCovariates().size() == 2 && keyManager.getOptionalCovariates().isEmpty()) { // look for the QualityScore table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(0, covs.get(0)); // replace the covariate value with the quality score
|
||||
newCovs.add(1, covs.get(1));
|
||||
newCovs.add(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||
newCovs.add(3, covs.get(2));
|
||||
BitSet deltaKey = deltaKeyManager.bitSetFromKey(newCovs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
else if (keyManager.getOptionalCovariates().size() > 0) { // look for the optional covariates table
|
||||
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||
|
||||
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||
byte originalQuality = Byte.parseByte((String) covs.get(QUALITY_SCORE_COVARIATE_INDEX)); // save the original quality for accuracy calculation later on
|
||||
covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
BitSet deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum, originalQuality); // add this covariate to the delta table
|
||||
|
||||
covs.set(1, originalQuality); // replace the covariate value with the quality score
|
||||
covs.set(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||
deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum, originalQuality); // add this covariate to the delta table
|
||||
addToDeltaTable(deltaTable, deltaKey, recalDatum); // add this covariate to the delta table
|
||||
}
|
||||
readyToPrint = true;
|
||||
}
|
||||
|
||||
// output the csv file
|
||||
if (readyToPrint) {
|
||||
|
||||
// print header
|
||||
if (printHeader) {
|
||||
List<String> header = new LinkedList<String>();
|
||||
header.add("ReadGroup");
|
||||
|
|
@ -405,11 +431,11 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
// print each data line
|
||||
for(Map.Entry<BitSet, AccuracyDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
for(Map.Entry<BitSet, RecalDatum> deltaEntry : deltaTable.entrySet()) {
|
||||
List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||
RecalDatum deltaDatum = deltaEntry.getValue();
|
||||
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||
deltaTableFile.print("," + deltaDatum.toString());
|
||||
deltaTableFile.print("," + deltaDatum.stringForCSV());
|
||||
deltaTableFile.println("," + recalibrationMode);
|
||||
}
|
||||
|
||||
|
|
@ -419,21 +445,20 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Updates the current AccuracyDatum element in the delta table.
|
||||
* Updates the current RecalDatum element in the delta table.
|
||||
*
|
||||
* If it doesn't have an element yet, it creates an AccuracyDatum element and adds it to the delta table.
|
||||
* If it doesn't have an element yet, it creates an RecalDatum element and adds it to the delta table.
|
||||
*
|
||||
* @param deltaTable the delta table
|
||||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
* @param originalQuality the quality score to we can calculate the accuracy for the accuracyDatum element
|
||||
*/
|
||||
private static void addToDeltaTable(Map<BitSet, AccuracyDatum> deltaTable, BitSet deltaKey, RecalDatum recalDatum, byte originalQuality) {
|
||||
AccuracyDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(Map<BitSet, RecalDatum> deltaTable, BitSet deltaKey, RecalDatum recalDatum) {
|
||||
RecalDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(deltaKey, new AccuracyDatum(recalDatum, originalQuality)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
deltaTable.put(deltaKey, new RecalDatum(recalDatum)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
deltaDatum.combine(recalDatum, originalQuality); // if we do have a datum, combine it with this one.
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -109,6 +109,10 @@ public class RecalDatum extends Datum {
|
|||
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
return String.format("%s,%d,%.2f", toString(), (byte) Math.floor(getEstimatedQReported()), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
|
|
|
|||
|
|
@ -293,9 +293,6 @@ public class RecalibrationReport {
|
|||
else if (primaryKey.equals("no_plots"))
|
||||
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
|
||||
|
||||
else if (primaryKey.equals("no_plots"))
|
||||
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
|
||||
|
||||
else if (primaryKey.equals("recalibration_report"))
|
||||
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import net.sf.samtools.SAMUtils;
|
|||
* @author Kiran Garimella
|
||||
*/
|
||||
public class QualityUtils {
|
||||
public final static byte MAX_RECALIBRATED_Q_SCORE = 50;
|
||||
public final static byte MAX_RECALIBRATED_Q_SCORE = 93;
|
||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
||||
|
||||
|
|
@ -104,9 +104,8 @@ public class QualityUtils {
|
|||
*/
|
||||
static public byte probToQual(double prob, double eps) {
|
||||
double lp = Math.round(-10.0*Math.log10(1.0 - prob + eps));
|
||||
byte b = boundQual((int)lp);
|
||||
//System.out.printf("LP is %f, byte is %d%n", lp, b);
|
||||
return b;
|
||||
return boundQual((int)lp);
|
||||
}
|
||||
|
||||
static public double phredScaleCorrectRate(double trueRate) {
|
||||
|
|
@ -117,10 +116,6 @@ public class QualityUtils {
|
|||
return Math.abs(-10.0*Math.log10(errorRate));
|
||||
}
|
||||
|
||||
static public double lodToPhredScaleErrorRate(double lod) {
|
||||
return phredScaleErrorRate(1.0 / (Math.pow(10.0, lod) + 1.0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a quality score, capped at max qual.
|
||||
*
|
||||
|
|
@ -134,12 +129,11 @@ public class QualityUtils {
|
|||
/**
|
||||
* Returns an integer quality score bounded by 1 - maxQual.
|
||||
*
|
||||
* @param qual
|
||||
* @param maxQual
|
||||
* @return
|
||||
* @param qual the quality score
|
||||
* @param maxQual the maximum quality
|
||||
* @return the integer betwen 1 and maxqual.
|
||||
*/
|
||||
static public byte boundQual(int qual, byte maxQual) {
|
||||
//return (byte) Math.min(qual, maxQual);
|
||||
return (byte) Math.max(Math.min(qual, maxQual), 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,27 +28,17 @@ public class ReadGroupCovariateUnitTest {
|
|||
public void testSingleRecord() {
|
||||
final String expected = "SAMPLE.1";
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID");
|
||||
rg.setSample("SAMPLE");
|
||||
rg.setLane("1");
|
||||
rg.setPlatformUnit(expected);
|
||||
runTest(rg, expected);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMissingLane() {
|
||||
final String expected = "SAMPLE.7";
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.7");
|
||||
rg.setSample("SAMPLE");
|
||||
public void testMissingPlatformUnit() {
|
||||
final String expected = "MY.7";
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(expected);
|
||||
runTest(rg, expected);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMissingSample() {
|
||||
final String expected = "MY.ID";
|
||||
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("MY.ID");
|
||||
rg.setLane("1");
|
||||
runTest(rg, expected);
|
||||
}
|
||||
|
||||
|
||||
private void runTest(GATKSAMReadGroupRecord rg, String expected) {
|
||||
GATKSAMRecord read = ReadUtils.createRandomRead(10);
|
||||
read.setReadGroup(rg);
|
||||
|
|
|
|||
Loading…
Reference in New Issue