Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Menachem Fromer 2012-04-17 20:33:45 -04:00
commit e3bc1e8630
16 changed files with 150 additions and 205 deletions

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -91,7 +90,7 @@ import java.util.TreeSet;
*/ */
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE}) @Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> { public class PrintReadsWalker extends ReadWalker<GATKSAMRecord, SAMFileWriter> {
@Output(doc="Write output to this BAM filename instead of STDOUT") @Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out; SAMFileWriter out;
@ -129,6 +128,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames = new TreeSet<String>(); public Set<String> sampleNames = new TreeSet<String>();
/**
* Erase all extra attributes in the read but keep the read group information
*/
@Argument(fullName="simplify", shortName="s", doc="Simplify all reads.", required=false)
public boolean simplifyReads = false;
private TreeSet<String> samplesToChoose = new TreeSet<String>(); private TreeSet<String> samplesToChoose = new TreeSet<String>();
private boolean SAMPLES_SPECIFIED = false; private boolean SAMPLES_SPECIFIED = false;
@ -162,7 +168,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* The reads filter function. * The reads filter function.
* *
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a GATKSAMRecord
* @return true if the read passes the filter, false if it doesn't * @return true if the read passes the filter, false if it doesn't
*/ */
public boolean filter(ReferenceContext ref, GATKSAMRecord read) { public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
@ -208,11 +214,11 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* The reads map function. * The reads map function.
* *
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a GATKSAMRecord
* @return the read itself * @return the read itself
*/ */
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read; return simplifyReads ? read.simplify() : read;
} }
/** /**
@ -232,7 +238,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @param output the output source * @param output the output source
* @return the SAMFileWriter, so that the next reduce can emit to the same source * @return the SAMFileWriter, so that the next reduce can emit to the same source
*/ */
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { public SAMFileWriter reduce( GATKSAMRecord read, SAMFileWriter output ) {
output.addAlignment(read); output.addAlignment(read);
return output; return output;
} }

View File

@ -0,0 +1,52 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.LinkedList;
import java.util.List;
/**
* Short one line description of the walker.
*
* <p> [Long description of the walker] </p>
*
*
* <h2>Input</h2> <p> [Description of the Input] </p>
*
* <h2>Output</h2> <p> [Description of the Output] </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T [walker name]
* </pre>
*
* @author Mauricio Carneiro
* @since 4/17/12
*/
public class AccuracyDatum extends RecalDatum {
private final List<Double> accuracy = new LinkedList<Double>();
private final List<Byte> reportedQualities = new LinkedList<Byte>();
public AccuracyDatum(final RecalDatum recalDatum, final byte originalQuality) {
super(recalDatum);
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
reportedQualities.add(originalQuality);
}
public void combine(final RecalDatum recalDatum, final byte originalQuality) {
this.combine(recalDatum);
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
reportedQualities.add(originalQuality);
}
@Override
public String toString() {
return String.format("%s,%.2f,%.2f", super.toString(), MathUtils.average(reportedQualities), MathUtils.average(accuracy));
}
private static double calculateAccuracy(final RecalDatum recalDatum, final byte originalQuality) {
return recalDatum.getEmpiricalQuality() - originalQuality;
}
}

View File

@ -52,7 +52,7 @@ public class BQSRKeyManager {
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
int nBits = required.numberOfBits(); // number of bits used by this covariate int nBits = required.numberOfBits(); // number of bits used by this covariate
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
nRequiredBits += nBits; nRequiredBits += nBits;
} }
@ -184,7 +184,7 @@ public class BQSRKeyManager {
* @return an object array with the values for each key * @return an object array with the values for each key
*/ */
public List<Object> keySetFrom(BitSet key) { public List<Object> keySetFrom(BitSet key) {
List<Object> objectKeys = new ArrayList<Object>(); List<Object> objectKeys = new LinkedList<Object>();
for (RequiredCovariateInfo info : requiredCovariates) { for (RequiredCovariateInfo info : requiredCovariates) {
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
@ -286,7 +286,7 @@ public class BQSRKeyManager {
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits ) public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
public final Covariate covariate; // this allows reverse lookup of the Covariates in order public final Covariate covariate; // this allows reverse lookup of the Covariates in order
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) { RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
this.bitsBefore = bitsBefore; this.bitsBefore = bitsBefore;
this.mask = mask; this.mask = mask;
this.covariate = covariate; this.covariate = covariate;

View File

@ -47,9 +47,6 @@ public class ContextCovariate implements StandardCovariate {
private int insertionsContextSize; private int insertionsContextSize;
private int deletionsContextSize; private int deletionsContextSize;
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
// protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
private byte LOW_QUAL_TAIL; private byte LOW_QUAL_TAIL;
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers

View File

@ -14,9 +14,9 @@ import java.util.BitSet;
* @since 2/8/12 * @since 2/8/12
*/ */
public class CovariateValues { public class CovariateValues {
private BitSet[] mismatches; private final BitSet[] mismatches;
private BitSet[] insertions; private final BitSet[] insertions;
private BitSet[] deletions; private final BitSet[] deletions;
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) { public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
this.mismatches = mismatch; this.mismatches = mismatch;

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import java.util.List;
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2010 The Broad Institute
* *
@ -38,10 +36,13 @@ import java.util.List;
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. * Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/ */
public class RecalDatumOptimized { public class Datum {
long numObservations; // number of bases seen in total
long numMismatches; // number of bases seen that didn't match the reference
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
protected long numObservations; // number of bases seen in total
protected long numMismatches; // number of bases seen that didn't match the reference
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
// //
@ -49,67 +50,43 @@ public class RecalDatumOptimized {
// //
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
public RecalDatumOptimized() { public Datum() {
numObservations = 0L; numObservations = 0L;
numMismatches = 0L; numMismatches = 0L;
} }
public RecalDatumOptimized(final long _numObservations, final long _numMismatches) {
numObservations = _numObservations;
numMismatches = _numMismatches;
}
public RecalDatumOptimized(final RecalDatumOptimized copy) {
this.numObservations = copy.numObservations;
this.numMismatches = copy.numMismatches;
}
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
// //
// increment methods // increment methods
// //
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
public synchronized final void increment(final long incObservations, final long incMismatches) { synchronized void increment(final long incObservations, final long incMismatches) {
numObservations += incObservations; numObservations += incObservations;
numMismatches += incMismatches; numMismatches += incMismatches;
} }
public synchronized final void increment(final RecalDatumOptimized other) {
increment(other.numObservations, other.numMismatches);
}
public synchronized final void increment(final List<RecalDatumOptimized> data) {
for (RecalDatumOptimized other : data) {
this.increment(other);
}
}
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
// //
// methods to derive empirical quality score // methods to derive empirical quality score
// //
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
public final double empiricalQualDouble(final int smoothing, final double maxQual) { double empiricalQualDouble() {
final double doubleMismatches = (double) (numMismatches + smoothing); final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
final double doubleObservations = (double) (numObservations + smoothing); final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
return Math.min(empiricalQual, maxQual); return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
} }
public final byte empiricalQualByte(final int smoothing) { byte empiricalQualByte() {
final double doubleMismatches = (double) (numMismatches + smoothing); final double doubleMismatches = (double) (numMismatches);
final double doubleObservations = (double) (numObservations + smoothing); final double doubleObservations = (double) (numObservations);
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40 return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
}
public final byte empiricalQualByte() {
return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
} }
@Override @Override
public final String toString() { public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte()); return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
} }

View File

@ -7,8 +7,8 @@ public enum EventType {
BASE_INSERTION(1, "I"), BASE_INSERTION(1, "I"),
BASE_DELETION(2, "D"); BASE_DELETION(2, "D");
public int index; public final int index;
public String representation; private final String representation;
private EventType(int index, String representation) { private EventType(int index, String representation) {
this.index = index; this.index = index;

View File

@ -19,9 +19,9 @@ import java.util.Map;
public class QuantizationInfo { public class QuantizationInfo {
private List<Byte> quantizedQuals; private List<Byte> quantizedQuals;
private List<Long> empiricalQualCounts; private List<Long> empiricalQualCounts;
int quantizationLevels; private int quantizationLevels;
public QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) { private QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
this.quantizedQuals = quantizedQuals; this.quantizedQuals = quantizedQuals;
this.empiricalQualCounts = empiricalQualCounts; this.empiricalQualCounts = empiricalQualCounts;
this.quantizationLevels = quantizationLevels; this.quantizationLevels = quantizationLevels;

View File

@ -13,9 +13,9 @@ import java.util.BitSet;
* @since 2/8/12 * @since 2/8/12
*/ */
public class ReadCovariates { public class ReadCovariates {
private BitSet[][] mismatchesKeySet; private final BitSet[][] mismatchesKeySet;
private BitSet[][] insertionsKeySet; private final BitSet[][] insertionsKeySet;
private BitSet[][] deletionsKeySet; private final BitSet[][] deletionsKeySet;
private int nextCovariateIndex; private int nextCovariateIndex;

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -221,7 +220,7 @@ public class RecalDataManager {
logger.info(""); logger.info("");
} }
public static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) { private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
List<GATKReportTable> result = new LinkedList<GATKReportTable>(); List<GATKReportTable> result = new LinkedList<GATKReportTable>();
int tableIndex = 0; int tableIndex = 0;
@ -349,7 +348,7 @@ public class RecalDataManager {
* @param read The SAMRecord to parse * @param read The SAMRecord to parse
* @return whether or not this read should be skipped * @return whether or not this read should be skipped
*/ */
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
@ -378,99 +377,10 @@ public class RecalDataManager {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
else else
return false; // otherwise, just skip the read return true; // otherwise, just skip the read
} }
} }
return true; return false;
}
/**
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
*
* @param read The SAMRecord to parse
* @param originalQualScores The array of original quality scores to modify during the correction
* @param solidRecalMode Which mode of solid recalibration to apply
* @param refBases The reference for this read
* @return A new array of quality scores that have been ref bias corrected
*/
public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
if (attr instanceof String) {
colorSpace = ((String) attr).getBytes();
}
else {
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases();
final byte[] colorImpliedBases = readBases.clone();
byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
if (read.getReadNegativeStrandFlag()) {
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
}
final int[] inconsistency = new int[readBases.length];
byte prevBase = colorSpace[0]; // The sentinel
for (int iii = 0; iii < readBases.length; iii++) {
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
colorImpliedBases[iii] = thisBase;
inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
prevBase = readBases[iii];
}
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
final boolean setBaseN = false;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
}
else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
final boolean setBaseN = true;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
}
else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
}
}
else {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
return originalQualScores;
}
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
if (ReadUtils.isSOLiDRead(read)) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
if (attr instanceof String) {
colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
}
else {
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
for (byte color : colorSpace) {
if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
}
}
}
else {
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
}
return false; // There aren't any color no calls in this SOLiD read
} }
/** /**
@ -625,16 +535,16 @@ public class RecalDataManager {
* @param offset The offset in the read at which to check * @param offset The offset in the read at which to check
* @return Returns true if the base was inconsistent with the color space * @return Returns true if the base was inconsistent with the color space
*/ */
public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) { public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
if (attr != null) { if (attr != null) {
final byte[] inconsistency = (byte[]) attr; final byte[] inconsistency = (byte[]) attr;
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
if (read.getReadNegativeStrandFlag()) { // Negative direction if (read.getReadNegativeStrandFlag()) { // Negative direction
return inconsistency[inconsistency.length - offset - 1] != (byte) 0; return inconsistency[inconsistency.length - offset - 1] == (byte) 0;
} }
else { // Forward direction else { // Forward direction
return inconsistency[offset] != (byte) 0; return inconsistency[offset] == (byte) 0;
} }
// This block of code is for if you want to check both the offset and the next base for color space inconsistency // This block of code is for if you want to check both the offset and the next base for color space inconsistency
@ -654,7 +564,7 @@ public class RecalDataManager {
} }
else { // No inconsistency array, so nothing is inconsistent else { // No inconsistency array, so nothing is inconsistent
return false; return true;
} }
} }

View File

@ -33,12 +33,11 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates. * An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
*/ */
public class RecalDatum extends RecalDatumOptimized { public class RecalDatum extends Datum {
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example) private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
// //
@ -50,7 +49,7 @@ public class RecalDatum extends RecalDatumOptimized {
numObservations = 0L; numObservations = 0L;
numMismatches = 0L; numMismatches = 0L;
estimatedQReported = 0.0; estimatedQReported = 0.0;
empiricalQuality = 0.0; empiricalQuality = -1.0;
} }
public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) { public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) {
@ -67,60 +66,52 @@ public class RecalDatum extends RecalDatumOptimized {
this.empiricalQuality = copy.empiricalQuality; this.empiricalQuality = copy.empiricalQuality;
} }
//--------------------------------------------------------------------------------------------------------------- public void combine(final RecalDatum other) {
//
// increment methods
//
//---------------------------------------------------------------------------------------------------------------
public final void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors(); final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
this.increment(other.numObservations, other.numMismatches); this.increment(other.numObservations, other.numMismatches);
this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations); this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
this.empiricalQuality = -1.0; // reset the empirical quality calculation so we never have a wrongly calculated empirical quality stored
} }
//--------------------------------------------------------------------------------------------------------------- public final void calcCombinedEmpiricalQuality() {
// this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
// methods to derive empirical quality score
//
//---------------------------------------------------------------------------------------------------------------
public final void calcCombinedEmpiricalQuality(final int maxQual) {
this.empiricalQuality = empiricalQualDouble(SMOOTHING_CONSTANT, maxQual); // cache the value so we don't call log over and over again
} }
public final void calcEstimatedReportedQuality() { public final void calcEstimatedReportedQuality() {
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations); this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations);
} }
//---------------------------------------------------------------------------------------------------------------
//
// misc. methods
//
//---------------------------------------------------------------------------------------------------------------
public final double getEstimatedQReported() { public final double getEstimatedQReported() {
return estimatedQReported; return estimatedQReported;
} }
public final double getEmpiricalQuality() { public final double getEmpiricalQuality() {
if (empiricalQuality < 0)
calcCombinedEmpiricalQuality();
return empiricalQuality; return empiricalQuality;
} }
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
/** /**
* Makes a hard copy of the recal datum element * Makes a hard copy of the recal datum element
* *
* @return a new recal datum object with the same contents of this datum. * @return a new recal datum object with the same contents of this datum.
*/ */
protected RecalDatum copy() { public RecalDatum copy() {
return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality); return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality);
} }
@Override
public String toString() {
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
}
private double calcExpectedErrors() {
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
}
private double qualToErrorProb(final double qual) {
return Math.pow(10.0, qual / -10.0);
}
} }

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import java.io.PrintStream; import java.io.File;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
@ -62,7 +62,7 @@ public class RecalibrationArgumentCollection {
*/ */
@Gather(BQSRGatherer.class) @Gather(BQSRGatherer.class)
@Output @Output
public PrintStream RECAL_FILE; public File RECAL_FILE;
/** /**
* List all implemented covariates. * List all implemented covariates.

View File

@ -18,13 +18,11 @@ import java.util.*;
*/ */
public class RecalibrationReport { public class RecalibrationReport {
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done) private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
public RecalibrationReport(final File RECAL_FILE) { public RecalibrationReport(final File RECAL_FILE) {
GATKReport report = new GATKReport(RECAL_FILE); GATKReport report = new GATKReport(RECAL_FILE);
@ -53,6 +51,7 @@ public class RecalibrationReport {
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES) int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE); final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
table = parseReadGroupTable(keyManager, reportTable); table = parseReadGroupTable(keyManager, reportTable);
@ -292,7 +291,7 @@ public class RecalibrationReport {
public void calculateEmpiricalAndQuantizedQualities() { public void calculateEmpiricalAndQuantizedQualities() {
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values()) for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
for (RecalDatum datum : table.values()) for (RecalDatum datum : table.values())
datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE); datum.calcCombinedEmpiricalQuality();
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS); quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
} }

View File

@ -51,6 +51,7 @@ public class BaseRecalibration {
* Constructor using a GATK Report file * Constructor using a GATK Report file
* *
* @param RECAL_FILE a GATK Report file containing the recalibration information * @param RECAL_FILE a GATK Report file containing the recalibration information
* @param quantizationLevels number of bins to quantize the quality scores
*/ */
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) { public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
@ -80,7 +81,7 @@ public class BaseRecalibration {
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
byte qualityScore = originalQuals[offset]; byte qualityScore = originalQuals[offset];
if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
} }

View File

@ -335,10 +335,11 @@ public class GATKSAMRecord extends BAMRecord {
/** /**
* Clears all attributes except ReadGroup of the read. * Clears all attributes except ReadGroup of the read.
*/ */
public void simplify () { public GATKSAMRecord simplify () {
GATKSAMReadGroupRecord rg = getReadGroup(); GATKSAMReadGroupRecord rg = getReadGroup();
this.clearAttributes(); this.clearAttributes();
setReadGroup(rg); setReadGroup(rg);
return this;
} }
/** /**

View File

@ -53,6 +53,17 @@ public class BQSRKeyManagerUnitTest {
createReadAndTest(covariates, nRequired); createReadAndTest(covariates, nRequired);
} }
@Test(enabled = true)
public void testOneCovariateWithOptionalCovariates() {
final int nRequired = 1;
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(4);
covariates.add(new ReadGroupCovariate());
covariates.add(new QualityScoreCovariate());
covariates.add(new CycleCovariate());
covariates.add(new ContextCovariate());
createReadAndTest(covariates, nRequired);
}
private void createReadAndTest(List<Covariate> covariates, int nRequired) { private void createReadAndTest(List<Covariate> covariates, int nRequired) {
int readLength = 1000; int readLength = 1000;
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M"); GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");