Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Menachem Fromer 2012-04-17 20:33:45 -04:00
commit e3bc1e8630
16 changed files with 150 additions and 205 deletions

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -91,7 +90,7 @@ import java.util.TreeSet;
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
public class PrintReadsWalker extends ReadWalker<GATKSAMRecord, SAMFileWriter> {
@Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out;
@ -129,6 +128,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames = new TreeSet<String>();
/**
* Erase all extra attributes in the read but keep the read group information
*/
@Argument(fullName="simplify", shortName="s", doc="Simplify all reads.", required=false)
public boolean simplifyReads = false;
private TreeSet<String> samplesToChoose = new TreeSet<String>();
private boolean SAMPLES_SPECIFIED = false;
@ -162,7 +168,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @param read the read itself, as a GATKSAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
@ -208,11 +214,11 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @param read the read itself, as a GATKSAMRecord
* @return the read itself
*/
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return simplifyReads ? read.simplify() : read;
}
/**
@ -232,7 +238,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @param output the output source
* @return the SAMFileWriter, so that the next reduce can emit to the same source
*/
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
public SAMFileWriter reduce( GATKSAMRecord read, SAMFileWriter output ) {
output.addAlignment(read);
return output;
}

View File

@ -0,0 +1,52 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.LinkedList;
import java.util.List;
/**
* Short one line description of the walker.
*
* <p> [Long description of the walker] </p>
*
*
* <h2>Input</h2> <p> [Description of the Input] </p>
*
* <h2>Output</h2> <p> [Description of the Output] </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T [walker name]
* </pre>
*
* @author Mauricio Carneiro
* @since 4/17/12
*/
public class AccuracyDatum extends RecalDatum {
private final List<Double> accuracy = new LinkedList<Double>();
private final List<Byte> reportedQualities = new LinkedList<Byte>();
public AccuracyDatum(final RecalDatum recalDatum, final byte originalQuality) {
super(recalDatum);
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
reportedQualities.add(originalQuality);
}
public void combine(final RecalDatum recalDatum, final byte originalQuality) {
this.combine(recalDatum);
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
reportedQualities.add(originalQuality);
}
@Override
public String toString() {
return String.format("%s,%.2f,%.2f", super.toString(), MathUtils.average(reportedQualities), MathUtils.average(accuracy));
}
private static double calculateAccuracy(final RecalDatum recalDatum, final byte originalQuality) {
return recalDatum.getEmpiricalQuality() - originalQuality;
}
}

View File

@ -52,7 +52,7 @@ public class BQSRKeyManager {
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
int nBits = required.numberOfBits(); // number of bits used by this covariate
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
nRequiredBits += nBits;
}
@ -184,7 +184,7 @@ public class BQSRKeyManager {
* @return an object array with the values for each key
*/
public List<Object> keySetFrom(BitSet key) {
List<Object> objectKeys = new ArrayList<Object>();
List<Object> objectKeys = new LinkedList<Object>();
for (RequiredCovariateInfo info : requiredCovariates) {
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
@ -286,7 +286,7 @@ public class BQSRKeyManager {
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
this.bitsBefore = bitsBefore;
this.mask = mask;
this.covariate = covariate;

View File

@ -47,9 +47,6 @@ public class ContextCovariate implements StandardCovariate {
private int insertionsContextSize;
private int deletionsContextSize;
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
// protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
private byte LOW_QUAL_TAIL;
// Initialize any member variables using the command-line arguments passed to the walkers

View File

@ -14,9 +14,9 @@ import java.util.BitSet;
* @since 2/8/12
*/
public class CovariateValues {
private BitSet[] mismatches;
private BitSet[] insertions;
private BitSet[] deletions;
private final BitSet[] mismatches;
private final BitSet[] insertions;
private final BitSet[] deletions;
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
this.mismatches = mismatch;

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.List;
/*
* Copyright (c) 2010 The Broad Institute
*
@ -38,10 +36,13 @@ import java.util.List;
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatumOptimized {
public class Datum {
long numObservations; // number of bases seen in total
long numMismatches; // number of bases seen that didn't match the reference
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
protected long numObservations; // number of bases seen in total
protected long numMismatches; // number of bases seen that didn't match the reference
//---------------------------------------------------------------------------------------------------------------
//
@ -49,67 +50,43 @@ public class RecalDatumOptimized {
//
//---------------------------------------------------------------------------------------------------------------
public RecalDatumOptimized() {
public Datum() {
numObservations = 0L;
numMismatches = 0L;
}
public RecalDatumOptimized(final long _numObservations, final long _numMismatches) {
numObservations = _numObservations;
numMismatches = _numMismatches;
}
public RecalDatumOptimized(final RecalDatumOptimized copy) {
this.numObservations = copy.numObservations;
this.numMismatches = copy.numMismatches;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
public synchronized final void increment(final long incObservations, final long incMismatches) {
synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
}
public synchronized final void increment(final RecalDatumOptimized other) {
increment(other.numObservations, other.numMismatches);
}
public synchronized final void increment(final List<RecalDatumOptimized> data) {
for (RecalDatumOptimized other : data) {
this.increment(other);
}
}
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
public final double empiricalQualDouble(final int smoothing, final double maxQual) {
final double doubleMismatches = (double) (numMismatches + smoothing);
final double doubleObservations = (double) (numObservations + smoothing);
double empiricalQualDouble() {
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, maxQual);
return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
}
public final byte empiricalQualByte(final int smoothing) {
final double doubleMismatches = (double) (numMismatches + smoothing);
final double doubleObservations = (double) (numObservations + smoothing);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
public final byte empiricalQualByte() {
return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
byte empiricalQualByte() {
final double doubleMismatches = (double) (numMismatches);
final double doubleObservations = (double) (numObservations);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
@Override
public final String toString() {
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
}

View File

@ -7,8 +7,8 @@ public enum EventType {
BASE_INSERTION(1, "I"),
BASE_DELETION(2, "D");
public int index;
public String representation;
public final int index;
private final String representation;
private EventType(int index, String representation) {
this.index = index;

View File

@ -19,9 +19,9 @@ import java.util.Map;
public class QuantizationInfo {
private List<Byte> quantizedQuals;
private List<Long> empiricalQualCounts;
int quantizationLevels;
private int quantizationLevels;
public QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
private QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
this.quantizedQuals = quantizedQuals;
this.empiricalQualCounts = empiricalQualCounts;
this.quantizationLevels = quantizationLevels;

View File

@ -13,9 +13,9 @@ import java.util.BitSet;
* @since 2/8/12
*/
public class ReadCovariates {
private BitSet[][] mismatchesKeySet;
private BitSet[][] insertionsKeySet;
private BitSet[][] deletionsKeySet;
private final BitSet[][] mismatchesKeySet;
private final BitSet[][] insertionsKeySet;
private final BitSet[][] deletionsKeySet;
private int nextCovariateIndex;

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -221,7 +220,7 @@ public class RecalDataManager {
logger.info("");
}
public static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
int tableIndex = 0;
@ -349,7 +348,7 @@ public class RecalDataManager {
* @param read The SAMRecord to parse
* @return whether or not this read should be skipped
*/
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
@ -378,99 +377,10 @@ public class RecalDataManager {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
else
return false; // otherwise, just skip the read
return true; // otherwise, just skip the read
}
}
return true;
}
/**
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
*
* @param read The SAMRecord to parse
* @param originalQualScores The array of original quality scores to modify during the correction
* @param solidRecalMode Which mode of solid recalibration to apply
* @param refBases The reference for this read
* @return A new array of quality scores that have been ref bias corrected
*/
public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
if (attr instanceof String) {
colorSpace = ((String) attr).getBytes();
}
else {
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases();
final byte[] colorImpliedBases = readBases.clone();
byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
if (read.getReadNegativeStrandFlag()) {
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
}
final int[] inconsistency = new int[readBases.length];
byte prevBase = colorSpace[0]; // The sentinel
for (int iii = 0; iii < readBases.length; iii++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
colorImpliedBases[iii] = thisBase;
inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
prevBase = readBases[iii];
}
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
final boolean setBaseN = false;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
}
else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
final boolean setBaseN = true;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
}
else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
}
}
else {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
return originalQualScores;
}
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
if (attr instanceof String) {
colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
}
else {
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
for (byte color : colorSpace) {
if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
}
}
}
else {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
}
return false; // There aren't any color no calls in this SOLiD read
return false;
}
/**
@ -625,16 +535,16 @@ public class RecalDataManager {
* @param offset The offset in the read at which to check
* @return Returns true if the base was inconsistent with the color space
*/
public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
if (attr != null) {
final byte[] inconsistency = (byte[]) attr;
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
if (read.getReadNegativeStrandFlag()) { // Negative direction
return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
return inconsistency[inconsistency.length - offset - 1] == (byte) 0;
}
else { // Forward direction
return inconsistency[offset] != (byte) 0;
return inconsistency[offset] == (byte) 0;
}
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
@ -654,7 +564,7 @@ public class RecalDataManager {
}
else { // No inconsistency array, so nothing is inconsistent
return false;
return true;
}
}

View File

@ -33,12 +33,11 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/
public class RecalDatum extends RecalDatumOptimized {
public class RecalDatum extends Datum {
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//---------------------------------------------------------------------------------------------------------------
//
@ -50,7 +49,7 @@ public class RecalDatum extends RecalDatumOptimized {
numObservations = 0L;
numMismatches = 0L;
estimatedQReported = 0.0;
empiricalQuality = 0.0;
empiricalQuality = -1.0;
}
public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) {
@ -67,60 +66,52 @@ public class RecalDatum extends RecalDatumOptimized {
this.empiricalQuality = copy.empiricalQuality;
}
//---------------------------------------------------------------------------------------------------------------
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
public final void combine(final RecalDatum other) {
public void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
this.increment(other.numObservations, other.numMismatches);
this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
this.empiricalQuality = -1.0; // reset the empirical quality calculation so we never have a wrongly calculated empirical quality stored
}
//---------------------------------------------------------------------------------------------------------------
//
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
public final void calcCombinedEmpiricalQuality(final int maxQual) {
this.empiricalQuality = empiricalQualDouble(SMOOTHING_CONSTANT, maxQual); // cache the value so we don't call log over and over again
public final void calcCombinedEmpiricalQuality() {
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
}
public final void calcEstimatedReportedQuality() {
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations);
}
//---------------------------------------------------------------------------------------------------------------
//
// misc. methods
//
//---------------------------------------------------------------------------------------------------------------
public final double getEstimatedQReported() {
return estimatedQReported;
}
public final double getEmpiricalQuality() {
if (empiricalQuality < 0)
calcCombinedEmpiricalQuality();
return empiricalQuality;
}
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
/**
* Makes a hard copy of the recal datum element
*
* @return a new recal datum object with the same contents of this datum.
*/
protected RecalDatum copy() {
public RecalDatum copy() {
return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality);
}
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
}

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils;
import java.io.PrintStream;
import java.io.File;
import java.util.Collections;
import java.util.List;
@ -62,7 +62,7 @@ public class RecalibrationArgumentCollection {
*/
@Gather(BQSRGatherer.class)
@Output
public PrintStream RECAL_FILE;
public File RECAL_FILE;
/**
* List all implemented covariates.

View File

@ -18,13 +18,11 @@ import java.util.*;
*/
public class RecalibrationReport {
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
public RecalibrationReport(final File RECAL_FILE) {
GATKReport report = new GATKReport(RECAL_FILE);
@ -53,6 +51,7 @@ public class RecalibrationReport {
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
table = parseReadGroupTable(keyManager, reportTable);
@ -292,7 +291,7 @@ public class RecalibrationReport {
public void calculateEmpiricalAndQuantizedQualities() {
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
for (RecalDatum datum : table.values())
datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE);
datum.calcCombinedEmpiricalQuality();
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
}

View File

@ -51,6 +51,7 @@ public class BaseRecalibration {
* Constructor using a GATK Report file
*
* @param RECAL_FILE a GATK Report file containing the recalibration information
* @param quantizationLevels number of bins to quantize the quality scores
*/
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
@ -80,7 +81,7 @@ public class BaseRecalibration {
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
byte qualityScore = originalQuals[offset];
if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
}

View File

@ -335,10 +335,11 @@ public class GATKSAMRecord extends BAMRecord {
/**
* Clears all attributes except ReadGroup of the read.
*/
public void simplify () {
public GATKSAMRecord simplify () {
GATKSAMReadGroupRecord rg = getReadGroup();
this.clearAttributes();
setReadGroup(rg);
return this;
}
/**

View File

@ -53,6 +53,17 @@ public class BQSRKeyManagerUnitTest {
createReadAndTest(covariates, nRequired);
}
@Test(enabled = true)
public void testOneCovariateWithOptionalCovariates() {
final int nRequired = 1;
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(4);
covariates.add(new ReadGroupCovariate());
covariates.add(new QualityScoreCovariate());
covariates.add(new CycleCovariate());
covariates.add(new ContextCovariate());
createReadAndTest(covariates, nRequired);
}
private void createReadAndTest(List<Covariate> covariates, int nRequired) {
int readLength = 1000;
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");