BQSR triage and test routines
* updated BQSR queue script for faster turnaround * implemented plot generation for scatter/gatherered runs * adjusted output file names to be cooperative with the queue script * added the recalibration report file to the argument table in the report * added ReadCovariates unit test -- guarantees that all the covariates are being generated for every base in the read * added RecalibrationReport unit test -- guarantees the integrity of the delta tables
This commit is contained in:
parent
a733723439
commit
e39a59594a
|
|
@ -65,6 +65,14 @@ public class BQSRGatherer extends Gatherer {
|
||||||
if (generalReport == null)
|
if (generalReport == null)
|
||||||
throw new ReviewedStingException(EMPTY_INPUT_LIST);
|
throw new ReviewedStingException(EMPTY_INPUT_LIST);
|
||||||
|
|
||||||
|
RecalibrationArgumentCollection RAC = generalReport.getRAC();
|
||||||
|
if (RAC.recalibrationReport != null) {
|
||||||
|
RecalibrationReport originalReport = new RecalibrationReport(RAC.recalibrationReport);
|
||||||
|
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, originalReport.getKeysAndTablesMap(), generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
RecalDataManager.generateRecalibrationPlot(RAC.RECAL_FILE, generalReport.getKeysAndTablesMap(), RAC.KEEP_INTERMEDIATE_FILES);
|
||||||
|
|
||||||
generalReport.calculateEmpiricalAndQuantizedQualities();
|
generalReport.calculateEmpiricalAndQuantizedQualities();
|
||||||
generalReport.output(outputFile);
|
generalReport.output(outputFile);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -277,7 +277,42 @@ public class BQSRKeyManager {
|
||||||
bitSet.and(mask);
|
bitSet.and(mask);
|
||||||
return chopNBitsFrom(bitSet, leadingBits);
|
return chopNBitsFrom(bitSet, leadingBits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof BQSRKeyManager))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
BQSRKeyManager other = (BQSRKeyManager) o;
|
||||||
|
if (this == other)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (requiredCovariates.size() != other.requiredCovariates.size() || optionalCovariates.size() != other.optionalCovariates.size())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Iterator<RequiredCovariateInfo> otherRequiredIterator = other.requiredCovariates.iterator();
|
||||||
|
for (RequiredCovariateInfo thisInfo: requiredCovariates) {
|
||||||
|
RequiredCovariateInfo otherInfo = otherRequiredIterator.next();
|
||||||
|
|
||||||
|
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
||||||
|
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
||||||
|
if (!thisName.equals(otherName))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator<OptionalCovariateInfo> otherOptionalIterator = other.optionalCovariates.iterator();
|
||||||
|
for (OptionalCovariateInfo thisInfo : optionalCovariates) {
|
||||||
|
OptionalCovariateInfo otherInfo = otherOptionalIterator.next();
|
||||||
|
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
||||||
|
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
||||||
|
if (!thisName.equals(otherName))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Aggregate information for each Covariate
|
* Aggregate information for each Covariate
|
||||||
*/
|
*/
|
||||||
|
|
@ -292,7 +327,7 @@ public class BQSRKeyManager {
|
||||||
this.covariate = covariate;
|
this.covariate = covariate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class OptionalCovariateInfo {
|
class OptionalCovariateInfo {
|
||||||
public final BitSet covariateID; // cache the covariate ID
|
public final BitSet covariateID; // cache the covariate ID
|
||||||
public final Covariate covariate;
|
public final Covariate covariate;
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,11 @@ public class Datum {
|
||||||
numMismatches = 0L;
|
numMismatches = 0L;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Datum(long numObservations, long numMismatches) {
|
||||||
|
this.numObservations = numObservations;
|
||||||
|
this.numMismatches = numMismatches;
|
||||||
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// increment methods
|
// increment methods
|
||||||
|
|
@ -90,4 +95,11 @@ public class Datum {
|
||||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof Datum))
|
||||||
|
return false;
|
||||||
|
Datum other = (Datum) o;
|
||||||
|
return numMismatches == other.numMismatches && numObservations == other.numObservations;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,4 +62,19 @@ public class ReadCovariates {
|
||||||
for (int i = 0; i < covariateValues.length; i++)
|
for (int i = 0; i < covariateValues.length; i++)
|
||||||
keySet[i][nextCovariateIndex] = covariateValues[i];
|
keySet[i][nextCovariateIndex] = covariateValues[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testing routines
|
||||||
|
*/
|
||||||
|
protected BitSet[][] getMismatchesKeySet() {
|
||||||
|
return mismatchesKeySet;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected BitSet[][] getInsertionsKeySet() {
|
||||||
|
return insertionsKeySet;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected BitSet[][] getDeletionsKeySet() {
|
||||||
|
return deletionsKeySet;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,22 +25,24 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import net.sf.samtools.SAMUtils;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.io.Resource;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -74,11 +76,13 @@ public class RecalDataManager {
|
||||||
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
|
public final static String NUMBER_OBSERVATIONS_COLUMN_NAME = "Observations";
|
||||||
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
|
public final static String NUMBER_ERRORS_COLUMN_NAME = "Errors";
|
||||||
|
|
||||||
private final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
|
|
||||||
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
private final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
|
||||||
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
private final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
|
private static final String SCRIPT_FILE = "BQSR.R";
|
||||||
|
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
/**
|
/**
|
||||||
* Treat reference inserted bases as reference matching bases. Very unsafe!
|
* Treat reference inserted bases as reference matching bases. Very unsafe!
|
||||||
|
|
@ -309,6 +313,130 @@ public class RecalDataManager {
|
||||||
report.print(outputFile);
|
report.print(outputFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Pair<PrintStream, File> initializeRecalibrationPlot(File filename) {
|
||||||
|
final PrintStream deltaTableStream;
|
||||||
|
final File deltaTableFileName = new File(filename + ".csv");
|
||||||
|
try {
|
||||||
|
deltaTableStream = new PrintStream(deltaTableFileName);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(deltaTableFileName, "File " + deltaTableFileName + " could not be created");
|
||||||
|
}
|
||||||
|
return new Pair<PrintStream, File>(deltaTableStream, deltaTableFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void outputRecalibrationPlot(Pair<PrintStream, File> files, boolean keepIntermediates) {
|
||||||
|
final File csvFileName = files.getSecond();
|
||||||
|
final File plotFileName = new File(csvFileName + ".pdf");
|
||||||
|
files.getFirst().close();
|
||||||
|
|
||||||
|
RScriptExecutor executor = new RScriptExecutor();
|
||||||
|
executor.addScript(new Resource(SCRIPT_FILE, RecalDataManager.class));
|
||||||
|
executor.addArgs(csvFileName.getAbsolutePath());
|
||||||
|
executor.addArgs(plotFileName.getAbsolutePath());
|
||||||
|
executor.exec();
|
||||||
|
|
||||||
|
if (!keepIntermediates)
|
||||||
|
if (!csvFileName.delete())
|
||||||
|
throw new ReviewedStingException("Could not find file " + csvFileName.getAbsolutePath());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> original, boolean keepIntermediates) {
|
||||||
|
Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||||
|
writeCSV(files.getFirst(), original, "ORIGINAL", true);
|
||||||
|
outputRecalibrationPlot(files, keepIntermediates);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void generateRecalibrationPlot(File filename, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> original, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> recalibrated, boolean keepIntermediates) {
|
||||||
|
Pair<PrintStream, File> files = initializeRecalibrationPlot(filename);
|
||||||
|
writeCSV(files.getFirst(), recalibrated, "RECALIBRATED", true);
|
||||||
|
writeCSV(files.getFirst(), original, "ORIGINAL", false);
|
||||||
|
outputRecalibrationPlot(files, keepIntermediates);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void writeCSV(PrintStream deltaTableFile, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> map, String recalibrationMode, boolean printHeader) {
|
||||||
|
final int QUALITY_SCORE_COVARIATE_INDEX = 1;
|
||||||
|
final Map<BitSet, AccuracyDatum> deltaTable = new HashMap<BitSet, AccuracyDatum>();
|
||||||
|
|
||||||
|
|
||||||
|
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> tableEntry : map.entrySet()) {
|
||||||
|
BQSRKeyManager keyManager = tableEntry.getKey();
|
||||||
|
|
||||||
|
if (keyManager.getOptionalCovariates().size() > 0) { // only need the 'all covariates' table
|
||||||
|
Map<BitSet, RecalDatum> table = tableEntry.getValue();
|
||||||
|
|
||||||
|
// create a key manager for the delta table
|
||||||
|
List<Covariate> requiredCovariates = keyManager.getRequiredCovariates().subList(0, 1); // include the read group covariate as the only required covariate
|
||||||
|
List<Covariate> optionalCovariates = keyManager.getRequiredCovariates().subList(1, 2); // include the quality score covariate as an optional covariate
|
||||||
|
optionalCovariates.addAll(keyManager.getOptionalCovariates()); // include all optional covariates
|
||||||
|
BQSRKeyManager deltaKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates); // initialize the key manager
|
||||||
|
|
||||||
|
|
||||||
|
// create delta table
|
||||||
|
for (Map.Entry<BitSet, RecalDatum> entry : table.entrySet()) { // go through every element in the covariates table to create the delta table
|
||||||
|
RecalDatum recalDatum = entry.getValue(); // the current element (recal datum)
|
||||||
|
|
||||||
|
List<Object> covs = keyManager.keySetFrom(entry.getKey()); // extract the key objects from the bitset key
|
||||||
|
byte originalQuality = Byte.parseByte((String) covs.get(QUALITY_SCORE_COVARIATE_INDEX)); // save the original quality for accuracy calculation later on
|
||||||
|
covs.remove(QUALITY_SCORE_COVARIATE_INDEX); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||||
|
BitSet deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||||
|
addToDeltaTable(deltaTable, deltaKey, recalDatum, originalQuality); // add this covariate to the delta table
|
||||||
|
|
||||||
|
covs.set(1, originalQuality); // replace the covariate value with the quality score
|
||||||
|
covs.set(2, "QualityScore"); // replace the covariate name with QualityScore (for the QualityScore covariate)
|
||||||
|
deltaKey = deltaKeyManager.bitSetFromKey(covs.toArray()); // create a new bitset key for the delta table
|
||||||
|
addToDeltaTable(deltaTable, deltaKey, recalDatum, originalQuality); // add this covariate to the delta table
|
||||||
|
}
|
||||||
|
|
||||||
|
// print header
|
||||||
|
if (printHeader) {
|
||||||
|
List<String> header = new LinkedList<String>();
|
||||||
|
header.add("ReadGroup");
|
||||||
|
header.add("CovariateValue");
|
||||||
|
header.add("CovariateName");
|
||||||
|
header.add("EventType");
|
||||||
|
header.add("Observations");
|
||||||
|
header.add("Errors");
|
||||||
|
header.add("EmpiricalQuality");
|
||||||
|
header.add("AverageReportedQuality");
|
||||||
|
header.add("Accuracy");
|
||||||
|
header.add("Recalibration");
|
||||||
|
deltaTableFile.println(Utils.join(",", header));
|
||||||
|
}
|
||||||
|
|
||||||
|
// print each data line
|
||||||
|
for(Map.Entry<BitSet, AccuracyDatum> deltaEntry : deltaTable.entrySet()) {
|
||||||
|
List<Object> deltaKeys = deltaKeyManager.keySetFrom(deltaEntry.getKey());
|
||||||
|
RecalDatum deltaDatum = deltaEntry.getValue();
|
||||||
|
deltaTableFile.print(Utils.join(",", deltaKeys));
|
||||||
|
deltaTableFile.print("," + deltaDatum.toString());
|
||||||
|
deltaTableFile.println("," + recalibrationMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the current AccuracyDatum element in the delta table.
|
||||||
|
*
|
||||||
|
* If it doesn't have an element yet, it creates an AccuracyDatum element and adds it to the delta table.
|
||||||
|
*
|
||||||
|
* @param deltaTable the delta table
|
||||||
|
* @param deltaKey the key to the table
|
||||||
|
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||||
|
* @param originalQuality the quality score to we can calculate the accuracy for the accuracyDatum element
|
||||||
|
*/
|
||||||
|
private static void addToDeltaTable(Map<BitSet, AccuracyDatum> deltaTable, BitSet deltaKey, RecalDatum recalDatum, byte originalQuality) {
|
||||||
|
AccuracyDatum deltaDatum = deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||||
|
if (deltaDatum == null)
|
||||||
|
deltaTable.put(deltaKey, new AccuracyDatum(recalDatum, originalQuality)); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||||
|
else
|
||||||
|
deltaDatum.combine(recalDatum, originalQuality); // if we do have a datum, combine it with this one.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||||
*
|
*
|
||||||
|
|
@ -382,127 +510,6 @@ public class RecalDataManager {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
|
|
||||||
*
|
|
||||||
* @param read The SAMRecord to recalibrate
|
|
||||||
* @param readBases The bases in the read which have been RC'd if necessary
|
|
||||||
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
|
||||||
* @param originalQualScores The array of original quality scores to set to zero if needed
|
|
||||||
* @param refBases The reference which has been RC'd if necessary
|
|
||||||
* @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
|
|
||||||
* @return The byte array of original quality scores some of which might have been set to zero
|
|
||||||
*/
|
|
||||||
private static byte[] solidRecalSetToQZero(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, final byte[] refBases, final boolean setBaseN) {
|
|
||||||
|
|
||||||
final boolean negStrand = read.getReadNegativeStrandFlag();
|
|
||||||
for (int iii = 1; iii < originalQualScores.length; iii++) {
|
|
||||||
if (inconsistency[iii] == 1) {
|
|
||||||
if (readBases[iii] == refBases[iii]) {
|
|
||||||
if (negStrand) {
|
|
||||||
originalQualScores[originalQualScores.length - (iii + 1)] = (byte) 0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
originalQualScores[iii] = (byte) 0;
|
|
||||||
}
|
|
||||||
if (setBaseN) {
|
|
||||||
readBases[iii] = (byte) 'N';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Set the prev base to Q0 as well
|
|
||||||
if (readBases[iii - 1] == refBases[iii - 1]) {
|
|
||||||
if (negStrand) {
|
|
||||||
originalQualScores[originalQualScores.length - iii] = (byte) 0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
originalQualScores[iii - 1] = (byte) 0;
|
|
||||||
}
|
|
||||||
if (setBaseN) {
|
|
||||||
readBases[iii - 1] = (byte) 'N';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (negStrand) {
|
|
||||||
readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
|
|
||||||
}
|
|
||||||
read.setReadBases(readBases);
|
|
||||||
|
|
||||||
return originalQualScores;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference
|
|
||||||
*
|
|
||||||
* @param read The SAMRecord to recalibrate
|
|
||||||
* @param readBases The bases in the read which have been RC'd if necessary
|
|
||||||
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
|
||||||
* @param colorImpliedBases The bases implied by the color space, RC'd if necessary
|
|
||||||
* @param refBases The reference which has been RC'd if necessary
|
|
||||||
*/
|
|
||||||
private static void solidRecalRemoveRefBias(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, final byte[] refBases) {
|
|
||||||
|
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
|
||||||
if (attr != null) {
|
|
||||||
byte[] colorSpaceQuals;
|
|
||||||
if (attr instanceof String) {
|
|
||||||
String x = (String) attr;
|
|
||||||
colorSpaceQuals = x.getBytes();
|
|
||||||
SAMUtils.fastqToPhred(colorSpaceQuals);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int iii = 1; iii < inconsistency.length - 1; iii++) {
|
|
||||||
if (inconsistency[iii] == 1) {
|
|
||||||
for (int jjj = iii - 1; jjj <= iii; jjj++) { // Correct this base and the one before it along the direction of the read
|
|
||||||
if (jjj == iii || inconsistency[jjj] == 0) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
|
|
||||||
if (readBases[jjj] == refBases[jjj]) {
|
|
||||||
if (colorSpaceQuals[jjj] == colorSpaceQuals[jjj + 1]) { // Equal evidence for the color implied base and the reference base, so flip a coin
|
|
||||||
final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(2);
|
|
||||||
if (rand == 0) { // The color implied base won the coin flip
|
|
||||||
readBases[jjj] = colorImpliedBases[jjj];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
final int maxQuality = Math.max((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
|
|
||||||
final int minQuality = Math.min((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
|
|
||||||
int diffInQuality = maxQuality - minQuality;
|
|
||||||
int numLow = minQuality;
|
|
||||||
if (numLow == 0) {
|
|
||||||
numLow++;
|
|
||||||
diffInQuality++;
|
|
||||||
}
|
|
||||||
final int numHigh = Math.round(numLow * (float) Math.pow(10.0f, (float) diffInQuality / 10.0f)); // The color with higher quality is exponentially more likely
|
|
||||||
final int rand = GenomeAnalysisEngine.getRandomGenerator().nextInt(numLow + numHigh);
|
|
||||||
if (rand >= numLow) { // higher q score won
|
|
||||||
if (maxQuality == (int) colorSpaceQuals[jjj]) {
|
|
||||||
readBases[jjj] = colorImpliedBases[jjj];
|
|
||||||
} // else ref color had higher q score, and won out, so nothing to do here
|
|
||||||
}
|
|
||||||
else { // lower q score won
|
|
||||||
if (minQuality == (int) colorSpaceQuals[jjj]) {
|
|
||||||
readBases[jjj] = colorImpliedBases[jjj];
|
|
||||||
} // else ref color had lower q score, and won out, so nothing to do here
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
|
||||||
readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
|
|
||||||
}
|
|
||||||
read.setReadBases(readBases);
|
|
||||||
}
|
|
||||||
else { // No color space quality tag in file
|
|
||||||
throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given the base and the color calculate the next base in the sequence
|
* Given the base and the color calculate the next base in the sequence
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
|
|
@ -114,4 +118,29 @@ public class RecalDatum extends Datum {
|
||||||
return Math.pow(10.0, qual / -10.0);
|
return Math.pow(10.0, qual / -10.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||||
|
Random random = new Random();
|
||||||
|
int nObservations = random.nextInt(maxObservations);
|
||||||
|
int nErrors = random.nextInt(maxErrors);
|
||||||
|
Datum datum = new Datum(nObservations, nErrors);
|
||||||
|
double empiricalQuality = datum.empiricalQualDouble();
|
||||||
|
double estimatedQReported = empiricalQuality + ((10 * random.nextDouble()) - 5); // empirical quality +/- 5.
|
||||||
|
return new RecalDatum(nObservations, nErrors, estimatedQReported, empiricalQuality);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We don't compare the estimated quality reported because it may be different when read from
|
||||||
|
* report tables.
|
||||||
|
*
|
||||||
|
* @param o the other recal datum
|
||||||
|
* @return true if the two recal datums have the same number of observations, errors and empirical quality.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof RecalDatum))
|
||||||
|
return false;
|
||||||
|
RecalDatum other = (RecalDatum) o;
|
||||||
|
return super.equals(o) &&
|
||||||
|
MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -170,6 +170,8 @@ public class RecalibrationArgumentCollection {
|
||||||
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
|
@Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering")
|
||||||
public boolean NO_PLOTS = false;
|
public boolean NO_PLOTS = false;
|
||||||
|
|
||||||
|
public File recalibrationReport = null;
|
||||||
|
|
||||||
public GATKReportTable generateReportTable() {
|
public GATKReportTable generateReportTable() {
|
||||||
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run");
|
GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run");
|
||||||
argumentsTable.addPrimaryKey("Argument");
|
argumentsTable.addPrimaryKey("Argument");
|
||||||
|
|
@ -190,6 +192,7 @@ public class RecalibrationArgumentCollection {
|
||||||
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||||
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES);
|
||||||
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS);
|
||||||
|
argumentsTable.set("recalibration_report", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, recalibrationReport == null ? "null" : recalibrationReport.getAbsolutePath());
|
||||||
return argumentsTable;
|
return argumentsTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -73,19 +73,26 @@ public class RecalibrationReport {
|
||||||
keysAndTablesMap.put(keyManager, table);
|
keysAndTablesMap.put(keyManager, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected RecalibrationReport(QuantizationInfo quantizationInfo, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap, GATKReportTable argumentTable, RecalibrationArgumentCollection RAC) {
|
||||||
|
this.quantizationInfo = quantizationInfo;
|
||||||
|
this.keysAndTablesMap = keysAndTablesMap;
|
||||||
|
this.argumentTable = argumentTable;
|
||||||
|
this.RAC = RAC;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combines two recalibration reports by adding all observations and errors
|
* Combines two recalibration reports by adding all observations and errors
|
||||||
*
|
*
|
||||||
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
|
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
|
||||||
* them after combining. The reason for not calculating it is because this function is inteded for combining a
|
* them after combining. The reason for not calculating it is because this function is inteded for combining a
|
||||||
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
|
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
|
||||||
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
|
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
|
||||||
* makes this method faster
|
* makes this method faster
|
||||||
*
|
*
|
||||||
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
||||||
*
|
*
|
||||||
* @param other the recalibration report to combine with this one
|
* @param other the recalibration report to combine with this one
|
||||||
*/
|
*/
|
||||||
public void combine(RecalibrationReport other) {
|
public void combine(RecalibrationReport other) {
|
||||||
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> thisIterator = keysAndTablesMap.entrySet().iterator();
|
||||||
|
|
||||||
|
|
@ -285,6 +292,12 @@ public class RecalibrationReport {
|
||||||
|
|
||||||
else if (primaryKey.equals("no_plots"))
|
else if (primaryKey.equals("no_plots"))
|
||||||
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
|
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("no_plots"))
|
||||||
|
RAC.NO_PLOTS = Boolean.parseBoolean((String) value);
|
||||||
|
|
||||||
|
else if (primaryKey.equals("recalibration_report"))
|
||||||
|
RAC.recalibrationReport = (value == null) ? null : new File((String) value);
|
||||||
}
|
}
|
||||||
|
|
||||||
return RAC;
|
return RAC;
|
||||||
|
|
@ -305,4 +318,45 @@ public class RecalibrationReport {
|
||||||
public void output(PrintStream output) {
|
public void output(PrintStream output) {
|
||||||
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, keysAndTablesMap, output);
|
RecalDataManager.outputRecalibrationReport(argumentTable, quantizationInfo, keysAndTablesMap, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RecalibrationArgumentCollection getRAC() {
|
||||||
|
return RAC;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof RecalibrationReport))
|
||||||
|
return false;
|
||||||
|
RecalibrationReport other = (RecalibrationReport) o;
|
||||||
|
if (this == o)
|
||||||
|
return true;
|
||||||
|
return isEqualTable(this.keysAndTablesMap, other.keysAndTablesMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isEqualTable(LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> t1, LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> t2) {
|
||||||
|
if (t1.size() != t2.size())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> t1Iterator = t1.entrySet().iterator();
|
||||||
|
Iterator<Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>>> t2Iterator = t2.entrySet().iterator();
|
||||||
|
|
||||||
|
while (t1Iterator.hasNext() && t2Iterator.hasNext()) {
|
||||||
|
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> t1MapEntry = t1Iterator.next();
|
||||||
|
Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> t2MapEntry = t2Iterator.next();
|
||||||
|
|
||||||
|
if (!(t1MapEntry.getKey().equals(t2MapEntry.getKey())))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Map<BitSet, RecalDatum> table2 = t2MapEntry.getValue();
|
||||||
|
for (Map.Entry<BitSet, RecalDatum> t1TableEntry : t1MapEntry.getValue().entrySet()) {
|
||||||
|
BitSet t1Key = t1TableEntry.getKey();
|
||||||
|
if (!table2.containsKey(t1Key))
|
||||||
|
return false;
|
||||||
|
RecalDatum t1Datum = t1TableEntry.getValue();
|
||||||
|
if (!t1Datum.equals(table2.get(t1Key)))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -738,8 +738,12 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GATKSAMRecord createRandomRead(int length) {
|
public static GATKSAMRecord createRandomRead(int length) {
|
||||||
|
return createRandomRead(length, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static GATKSAMRecord createRandomRead(int length, boolean allowNs) {
|
||||||
byte[] quals = ReadUtils.createRandomReadQuals(length);
|
byte[] quals = ReadUtils.createRandomReadQuals(length);
|
||||||
byte[] bbases = ReadUtils.createRandomReadBases(length, true);
|
byte[] bbases = ReadUtils.createRandomReadBases(length, allowNs);
|
||||||
return ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
|
return ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,24 +31,29 @@ public class ContextCovariateUnitTest {
|
||||||
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
|
GATKSAMRecord read = ReadUtils.createRandomRead(1000);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
||||||
CovariateValues values = covariate.getValues(read);
|
CovariateValues values = covariate.getValues(read);
|
||||||
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));
|
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate);
|
||||||
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));
|
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||||
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));
|
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
|
public static void verifyCovariateArray(BitSet[] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) {
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++)
|
||||||
String expectedContext = null;
|
Assert.assertEquals(contextCovariate.keyFromBitSet(values[i]), expectedContext(read, i, contextSize));
|
||||||
if (i - contextSize + 1 >= 0) {
|
|
||||||
String context = bases.substring(i - contextSize + 1, i + 1);
|
}
|
||||||
if (!context.contains("N"))
|
|
||||||
expectedContext = context;
|
public static String expectedContext (GATKSAMRecord read, int offset, int contextSize) {
|
||||||
}
|
final String bases = stringFrom(read.getReadBases());
|
||||||
Assert.assertEquals(covariate.keyFromBitSet(values[i]), expectedContext);
|
String expectedContext = null;
|
||||||
|
if (offset - contextSize + 1 >= 0) {
|
||||||
|
String context = bases.substring(offset - contextSize + 1, offset + 1);
|
||||||
|
if (!context.contains("N"))
|
||||||
|
expectedContext = context;
|
||||||
}
|
}
|
||||||
|
return expectedContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String stringFrom(byte[] array) {
|
private static String stringFrom(byte[] array) {
|
||||||
String s = "";
|
String s = "";
|
||||||
for (byte value : array)
|
for (byte value : array)
|
||||||
s += (char) value;
|
s += (char) value;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author carneiro
|
||||||
|
* @since 4/21/12
|
||||||
|
*/
|
||||||
|
public class ReadCovariatesUnitTest {
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testCovariateGeneration() {
|
||||||
|
final String RGID = "id";
|
||||||
|
final int length = 10;
|
||||||
|
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
|
GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
|
||||||
|
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(RGID);
|
||||||
|
rg.setPlatform("illumina");
|
||||||
|
read.setReadGroup(rg);
|
||||||
|
final byte[] mQuals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
||||||
|
final byte[] iQuals = read.getBaseQualities(EventType.BASE_INSERTION);
|
||||||
|
final byte[] dQuals = read.getBaseQualities(EventType.BASE_DELETION);
|
||||||
|
|
||||||
|
ReadGroupCovariate rgCov = new ReadGroupCovariate();
|
||||||
|
QualityScoreCovariate qsCov = new QualityScoreCovariate();
|
||||||
|
ContextCovariate coCov = new ContextCovariate();
|
||||||
|
CycleCovariate cyCov = new CycleCovariate();
|
||||||
|
|
||||||
|
rgCov.initialize(RAC);
|
||||||
|
qsCov.initialize(RAC);
|
||||||
|
coCov.initialize(RAC);
|
||||||
|
cyCov.initialize(RAC);
|
||||||
|
|
||||||
|
List<Covariate> requestedCovariates = new ArrayList<Covariate>(4);
|
||||||
|
requestedCovariates.add(rgCov);
|
||||||
|
requestedCovariates.add(qsCov);
|
||||||
|
requestedCovariates.add(coCov);
|
||||||
|
requestedCovariates.add(cyCov);
|
||||||
|
|
||||||
|
ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
|
|
||||||
|
// check that the length is correct
|
||||||
|
Assert.assertEquals(rc.getMismatchesKeySet().length, length);
|
||||||
|
Assert.assertEquals(rc.getInsertionsKeySet().length, length);
|
||||||
|
Assert.assertEquals(rc.getDeletionsKeySet().length, length);
|
||||||
|
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
// check that read group is always the same
|
||||||
|
Assert.assertEquals(rgCov.keyFromBitSet(rc.getMismatchesKeySet(i)[0]), RGID);
|
||||||
|
Assert.assertEquals(rgCov.keyFromBitSet(rc.getInsertionsKeySet(i)[0]), RGID);
|
||||||
|
Assert.assertEquals(rgCov.keyFromBitSet(rc.getDeletionsKeySet(i)[0]), RGID);
|
||||||
|
|
||||||
|
// check quality score
|
||||||
|
Assert.assertEquals(qsCov.keyFromBitSet(rc.getMismatchesKeySet(i)[1]), "" + mQuals[i]);
|
||||||
|
Assert.assertEquals(qsCov.keyFromBitSet(rc.getInsertionsKeySet(i)[1]), "" + iQuals[i]);
|
||||||
|
Assert.assertEquals(qsCov.keyFromBitSet(rc.getDeletionsKeySet(i)[1]), "" + dQuals[i]);
|
||||||
|
|
||||||
|
// check context
|
||||||
|
Assert.assertEquals(coCov.keyFromBitSet(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE));
|
||||||
|
Assert.assertEquals(coCov.keyFromBitSet(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE));
|
||||||
|
Assert.assertEquals(coCov.keyFromBitSet(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE));
|
||||||
|
|
||||||
|
// check cycle
|
||||||
|
Assert.assertEquals(cyCov.keyFromBitSet(rc.getMismatchesKeySet(i)[3]), "" + (i+1));
|
||||||
|
Assert.assertEquals(cyCov.keyFromBitSet(rc.getInsertionsKeySet(i)[3]), "" + (i+1));
|
||||||
|
Assert.assertEquals(cyCov.keyFromBitSet(rc.getDeletionsKeySet(i)[3]), "" + (i+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,130 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author carneiro
|
||||||
|
* @since 4/21/12
|
||||||
|
*/
|
||||||
|
public class RecalibrationReportUnitTest {
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testOutput() {
|
||||||
|
final int length = 100;
|
||||||
|
|
||||||
|
List<Byte> quals = new ArrayList<Byte>(QualityUtils.MAX_QUAL_SCORE + 1);
|
||||||
|
List<Long> counts = new ArrayList<Long>(QualityUtils.MAX_QUAL_SCORE + 1);
|
||||||
|
|
||||||
|
for (int i = 0; i<= QualityUtils.MAX_QUAL_SCORE; i++) {
|
||||||
|
quals.add((byte) i);
|
||||||
|
counts.add(1L);
|
||||||
|
}
|
||||||
|
|
||||||
|
final QuantizationInfo quantizationInfo = new QuantizationInfo(quals, counts);
|
||||||
|
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||||
|
final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap = new LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>>();
|
||||||
|
|
||||||
|
quantizationInfo.noQuantization();
|
||||||
|
final List<Covariate> requiredCovariates = new LinkedList<Covariate>();
|
||||||
|
final List<Covariate> optionalCovariates = new LinkedList<Covariate>();
|
||||||
|
final List<Covariate> requestedCovariates = new LinkedList<Covariate>();
|
||||||
|
|
||||||
|
final ReadGroupCovariate rgCovariate = new ReadGroupCovariate();
|
||||||
|
rgCovariate.initialize(RAC);
|
||||||
|
requiredCovariates.add(rgCovariate);
|
||||||
|
final BQSRKeyManager rgKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
|
keysAndTablesMap.put(rgKeyManager, new HashMap<BitSet, RecalDatum>());
|
||||||
|
|
||||||
|
final QualityScoreCovariate qsCovariate = new QualityScoreCovariate();
|
||||||
|
qsCovariate.initialize(RAC);
|
||||||
|
requiredCovariates.add(qsCovariate);
|
||||||
|
final BQSRKeyManager qsKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
|
keysAndTablesMap.put(qsKeyManager, new HashMap<BitSet, RecalDatum>());
|
||||||
|
|
||||||
|
final ContextCovariate cxCovariate = new ContextCovariate();
|
||||||
|
cxCovariate.initialize(RAC);
|
||||||
|
optionalCovariates.add(cxCovariate);
|
||||||
|
final CycleCovariate cyCovariate = new CycleCovariate();
|
||||||
|
cyCovariate.initialize(RAC);
|
||||||
|
optionalCovariates.add(cyCovariate);
|
||||||
|
BQSRKeyManager cvKeyManager = new BQSRKeyManager(requiredCovariates, optionalCovariates);
|
||||||
|
keysAndTablesMap.put(cvKeyManager, new HashMap<BitSet, RecalDatum>());
|
||||||
|
|
||||||
|
for (Covariate cov : requiredCovariates)
|
||||||
|
requestedCovariates.add(cov);
|
||||||
|
for (Covariate cov : optionalCovariates)
|
||||||
|
requestedCovariates.add(cov);
|
||||||
|
|
||||||
|
final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("id");
|
||||||
|
rg.setPlatform("illumina");
|
||||||
|
final GATKSAMRecord read = ReadUtils.createRandomRead(length, false);
|
||||||
|
read.setReadGroup(rg);
|
||||||
|
final byte [] readQuals = new byte[length];
|
||||||
|
for (int i = 0; i < length; i++)
|
||||||
|
readQuals[i] = 20;
|
||||||
|
read.setBaseQualities(readQuals);
|
||||||
|
|
||||||
|
|
||||||
|
final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INSERTIONS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE);
|
||||||
|
int nKeys = 0; // keep track of how many keys were produced
|
||||||
|
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
|
for (int offset = 0; offset < length; offset++) {
|
||||||
|
for (Map.Entry<BQSRKeyManager, Map<BitSet, RecalDatum>> entry : keysAndTablesMap.entrySet()) {
|
||||||
|
BQSRKeyManager keyManager = entry.getKey();
|
||||||
|
Map<BitSet, RecalDatum> table = entry.getValue();
|
||||||
|
|
||||||
|
for (BitSet key : keyManager.bitSetsFromAllKeys(rc.getMismatchesKeySet(offset), EventType.BASE_SUBSTITUTION)) {
|
||||||
|
table.put(key, RecalDatum.createRandomRecalDatum(10000, 10));
|
||||||
|
nKeys++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (BitSet key : keyManager.bitSetsFromAllKeys(rc.getInsertionsKeySet(offset), EventType.BASE_INSERTION)) {
|
||||||
|
table.put(key, RecalDatum.createRandomRecalDatum(100000, 10));
|
||||||
|
nKeys++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (BitSet key : keyManager.bitSetsFromAllKeys(rc.getDeletionsKeySet(offset), EventType.BASE_DELETION)) {
|
||||||
|
table.put(key, RecalDatum.createRandomRecalDatum(100000, 10));
|
||||||
|
nKeys++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(nKeys, expectedKeys);
|
||||||
|
|
||||||
|
RecalibrationReport report = new RecalibrationReport(quantizationInfo, keysAndTablesMap, RAC.generateReportTable(), RAC);
|
||||||
|
|
||||||
|
File output = new File("RecalibrationReportUnitTestOutuput.grp");
|
||||||
|
PrintStream out;
|
||||||
|
try {
|
||||||
|
out = new PrintStream(output);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new ReviewedStingException("couldn't create the file " + output, e);
|
||||||
|
}
|
||||||
|
report.output(out);
|
||||||
|
|
||||||
|
RecalibrationReport loadedReport = new RecalibrationReport(output);
|
||||||
|
|
||||||
|
Assert.assertTrue(report.equals(loadedReport));
|
||||||
|
if (!output.delete())
|
||||||
|
throw new ReviewedStingException("File could not be deleted " + output);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int expectedNumberOfKeys (int nCovariates, int readLength, int indelContextSize, int mismatchesContextSize) {
|
||||||
|
int nommcs = readLength >= mismatchesContextSize ? mismatchesContextSize-1 : readLength;
|
||||||
|
int noincs = readLength >= indelContextSize ? 2*(indelContextSize-1) : 2*readLength;
|
||||||
|
return (nCovariates * readLength * 3) - nommcs - noincs;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue