Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e3bc1e8630
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -91,7 +90,7 @@ import java.util.TreeSet;
|
|||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
public class PrintReadsWalker extends ReadWalker<GATKSAMRecord, SAMFileWriter> {
|
||||
|
||||
@Output(doc="Write output to this BAM filename instead of STDOUT")
|
||||
SAMFileWriter out;
|
||||
|
|
@ -129,6 +128,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
||||
public Set<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
/**
|
||||
* Erase all extra attributes in the read but keep the read group information
|
||||
*/
|
||||
@Argument(fullName="simplify", shortName="s", doc="Simplify all reads.", required=false)
|
||||
public boolean simplifyReads = false;
|
||||
|
||||
|
||||
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||
private boolean SAMPLES_SPECIFIED = false;
|
||||
|
||||
|
|
@ -162,7 +168,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
|
|
@ -208,11 +214,11 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return simplifyReads ? read.simplify() : read;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -232,7 +238,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* @param output the output source
|
||||
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
||||
*/
|
||||
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
|
||||
public SAMFileWriter reduce( GATKSAMRecord read, SAMFileWriter output ) {
|
||||
output.addAlignment(read);
|
||||
return output;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* <p> [Long description of the walker] </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2> <p> [Description of the Input] </p>
|
||||
*
|
||||
* <h2>Output</h2> <p> [Description of the Output] </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T [walker name]
|
||||
* </pre>
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 4/17/12
|
||||
*/
|
||||
public class AccuracyDatum extends RecalDatum {
|
||||
private final List<Double> accuracy = new LinkedList<Double>();
|
||||
private final List<Byte> reportedQualities = new LinkedList<Byte>();
|
||||
|
||||
public AccuracyDatum(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
super(recalDatum);
|
||||
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
|
||||
reportedQualities.add(originalQuality);
|
||||
}
|
||||
|
||||
public void combine(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
this.combine(recalDatum);
|
||||
accuracy.add(calculateAccuracy(recalDatum, originalQuality));
|
||||
reportedQualities.add(originalQuality);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s,%.2f,%.2f", super.toString(), MathUtils.average(reportedQualities), MathUtils.average(accuracy));
|
||||
}
|
||||
|
||||
private static double calculateAccuracy(final RecalDatum recalDatum, final byte originalQuality) {
|
||||
return recalDatum.getEmpiricalQuality() - originalQuality;
|
||||
}
|
||||
}
|
||||
|
|
@ -52,7 +52,7 @@ public class BQSRKeyManager {
|
|||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
|
||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ public class BQSRKeyManager {
|
|||
* @return an object array with the values for each key
|
||||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
List<Object> objectKeys = new LinkedList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
|
|
@ -286,7 +286,7 @@ public class BQSRKeyManager {
|
|||
public final BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
|
||||
public final Covariate covariate; // this allows reverse lookup of the Covariates in order
|
||||
|
||||
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
|
||||
RequiredCovariateInfo(int bitsBefore, BitSet mask, Covariate covariate) {
|
||||
this.bitsBefore = bitsBefore;
|
||||
this.mask = mask;
|
||||
this.covariate = covariate;
|
||||
|
|
|
|||
|
|
@ -47,9 +47,6 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private int insertionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
|
||||
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
|
||||
// protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
|
||||
|
||||
private byte LOW_QUAL_TAIL;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private BitSet[] mismatches;
|
||||
private BitSet[] insertions;
|
||||
private BitSet[] deletions;
|
||||
private final BitSet[] mismatches;
|
||||
private final BitSet[] insertions;
|
||||
private final BitSet[] deletions;
|
||||
|
||||
public CovariateValues(BitSet[] mismatch, BitSet[] insertion, BitSet[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
|
|
@ -38,10 +36,13 @@ import java.util.List;
|
|||
* Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatumOptimized {
|
||||
public class Datum {
|
||||
|
||||
long numObservations; // number of bases seen in total
|
||||
long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
protected long numObservations; // number of bases seen in total
|
||||
protected long numMismatches; // number of bases seen that didn't match the reference
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -49,67 +50,43 @@ public class RecalDatumOptimized {
|
|||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public RecalDatumOptimized() {
|
||||
public Datum() {
|
||||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
}
|
||||
|
||||
public RecalDatumOptimized(final long _numObservations, final long _numMismatches) {
|
||||
numObservations = _numObservations;
|
||||
numMismatches = _numMismatches;
|
||||
}
|
||||
|
||||
public RecalDatumOptimized(final RecalDatumOptimized copy) {
|
||||
this.numObservations = copy.numObservations;
|
||||
this.numMismatches = copy.numMismatches;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public synchronized final void increment(final long incObservations, final long incMismatches) {
|
||||
synchronized void increment(final long incObservations, final long incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
}
|
||||
|
||||
public synchronized final void increment(final RecalDatumOptimized other) {
|
||||
increment(other.numObservations, other.numMismatches);
|
||||
}
|
||||
|
||||
public synchronized final void increment(final List<RecalDatumOptimized> data) {
|
||||
for (RecalDatumOptimized other : data) {
|
||||
this.increment(other);
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final double empiricalQualDouble(final int smoothing, final double maxQual) {
|
||||
final double doubleMismatches = (double) (numMismatches + smoothing);
|
||||
final double doubleObservations = (double) (numObservations + smoothing);
|
||||
double empiricalQualDouble() {
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, maxQual);
|
||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_QUAL_SCORE);
|
||||
}
|
||||
|
||||
public final byte empiricalQualByte(final int smoothing) {
|
||||
final double doubleMismatches = (double) (numMismatches + smoothing);
|
||||
final double doubleObservations = (double) (numObservations + smoothing);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
public final byte empiricalQualByte() {
|
||||
return empiricalQualByte(0); // 'default' behavior is to use smoothing value of zero
|
||||
byte empiricalQualByte() {
|
||||
final double doubleMismatches = (double) (numMismatches);
|
||||
final double doubleObservations = (double) (numObservations);
|
||||
return QualityUtils.probToQual(1.0 - doubleMismatches / doubleObservations); // This is capped at Q40
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (int) empiricalQualByte());
|
||||
}
|
||||
|
||||
|
|
@ -7,8 +7,8 @@ public enum EventType {
|
|||
BASE_INSERTION(1, "I"),
|
||||
BASE_DELETION(2, "D");
|
||||
|
||||
public int index;
|
||||
public String representation;
|
||||
public final int index;
|
||||
private final String representation;
|
||||
|
||||
private EventType(int index, String representation) {
|
||||
this.index = index;
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ import java.util.Map;
|
|||
public class QuantizationInfo {
|
||||
private List<Byte> quantizedQuals;
|
||||
private List<Long> empiricalQualCounts;
|
||||
int quantizationLevels;
|
||||
private int quantizationLevels;
|
||||
|
||||
public QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
|
||||
private QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
|
||||
this.quantizedQuals = quantizedQuals;
|
||||
this.empiricalQualCounts = empiricalQualCounts;
|
||||
this.quantizationLevels = quantizationLevels;
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ import java.util.BitSet;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private BitSet[][] mismatchesKeySet;
|
||||
private BitSet[][] insertionsKeySet;
|
||||
private BitSet[][] deletionsKeySet;
|
||||
private final BitSet[][] mismatchesKeySet;
|
||||
private final BitSet[][] insertionsKeySet;
|
||||
private final BitSet[][] deletionsKeySet;
|
||||
|
||||
private int nextCovariateIndex;
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -221,7 +220,7 @@ public class RecalDataManager {
|
|||
logger.info("");
|
||||
}
|
||||
|
||||
public static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
private static List<GATKReportTable> generateReportTables(Map<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
|
||||
|
|
@ -349,7 +348,7 @@ public class RecalDataManager {
|
|||
* @param read The SAMRecord to parse
|
||||
* @return whether or not this read should be skipped
|
||||
*/
|
||||
public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
public static boolean isColorSpaceConsistent(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) {
|
||||
if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
|
|
@ -378,99 +377,10 @@ public class RecalDataManager {
|
|||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
|
||||
else
|
||||
return false; // otherwise, just skip the read
|
||||
return true; // otherwise, just skip the read
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
|
||||
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
|
||||
*
|
||||
* @param read The SAMRecord to parse
|
||||
* @param originalQualScores The array of original quality scores to modify during the correction
|
||||
* @param solidRecalMode Which mode of solid recalibration to apply
|
||||
* @param refBases The reference for this read
|
||||
* @return A new array of quality scores that have been ref bias corrected
|
||||
*/
|
||||
public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
|
||||
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
if (attr instanceof String) {
|
||||
colorSpace = ((String) attr).getBytes();
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
byte[] readBases = read.getReadBases();
|
||||
final byte[] colorImpliedBases = readBases.clone();
|
||||
byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray(read.getCigar(), read.getReadBases(), refBases); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
readBases = BaseUtils.simpleReverseComplement(read.getReadBases());
|
||||
refBasesDirRead = BaseUtils.simpleReverseComplement(refBasesDirRead.clone());
|
||||
}
|
||||
final int[] inconsistency = new int[readBases.length];
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for (int iii = 0; iii < readBases.length; iii++) {
|
||||
final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]);
|
||||
colorImpliedBases[iii] = thisBase;
|
||||
inconsistency[iii] = (thisBase == readBases[iii] ? 0 : 1);
|
||||
prevBase = readBases[iii];
|
||||
}
|
||||
|
||||
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
|
||||
if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
|
||||
final boolean setBaseN = false;
|
||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||
}
|
||||
else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
|
||||
final boolean setBaseN = true;
|
||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||
}
|
||||
else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
||||
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
}
|
||||
|
||||
return originalQualScores;
|
||||
}
|
||||
|
||||
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
|
||||
if (ReadUtils.isSOLiDRead(read)) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if (attr != null) {
|
||||
byte[] colorSpace;
|
||||
if (attr instanceof String) {
|
||||
colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
|
||||
}
|
||||
else {
|
||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
for (byte color : colorSpace) {
|
||||
if (color != (byte) '0' && color != (byte) '1' && color != (byte) '2' && color != (byte) '3') {
|
||||
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||
}
|
||||
}
|
||||
|
||||
return false; // There aren't any color no calls in this SOLiD read
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -625,16 +535,16 @@ public class RecalDataManager {
|
|||
* @param offset The offset in the read at which to check
|
||||
* @return Returns true if the base was inconsistent with the color space
|
||||
*/
|
||||
public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
|
||||
public static boolean isColorSpaceConsistent(final GATKSAMRecord read, final int offset) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
|
||||
if (attr != null) {
|
||||
final byte[] inconsistency = (byte[]) attr;
|
||||
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
|
||||
if (read.getReadNegativeStrandFlag()) { // Negative direction
|
||||
return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
|
||||
return inconsistency[inconsistency.length - offset - 1] == (byte) 0;
|
||||
}
|
||||
else { // Forward direction
|
||||
return inconsistency[offset] != (byte) 0;
|
||||
return inconsistency[offset] == (byte) 0;
|
||||
}
|
||||
|
||||
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
|
||||
|
|
@ -654,7 +564,7 @@ public class RecalDataManager {
|
|||
|
||||
}
|
||||
else { // No inconsistency array, so nothing is inconsistent
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -33,12 +33,11 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number of reference mismatches seen for that combination of covariates.
|
||||
*/
|
||||
|
||||
public class RecalDatum extends RecalDatumOptimized {
|
||||
public class RecalDatum extends Datum {
|
||||
|
||||
private double estimatedQReported; // estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
private double empiricalQuality; // the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -50,7 +49,7 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
numObservations = 0L;
|
||||
numMismatches = 0L;
|
||||
estimatedQReported = 0.0;
|
||||
empiricalQuality = 0.0;
|
||||
empiricalQuality = -1.0;
|
||||
}
|
||||
|
||||
public RecalDatum(final long _numObservations, final long _numMismatches, final double _estimatedQReported, final double _empiricalQuality) {
|
||||
|
|
@ -67,60 +66,52 @@ public class RecalDatum extends RecalDatumOptimized {
|
|||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final void combine(final RecalDatum other) {
|
||||
public void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
this.increment(other.numObservations, other.numMismatches);
|
||||
this.estimatedQReported = -10 * Math.log10(sumErrors / this.numObservations);
|
||||
this.empiricalQuality = -1.0; // reset the empirical quality calculation so we never have a wrongly calculated empirical quality stored
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// methods to derive empirical quality score
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final void calcCombinedEmpiricalQuality(final int maxQual) {
|
||||
this.empiricalQuality = empiricalQualDouble(SMOOTHING_CONSTANT, maxQual); // cache the value so we don't call log over and over again
|
||||
public final void calcCombinedEmpiricalQuality() {
|
||||
this.empiricalQuality = empiricalQualDouble(); // cache the value so we don't call log over and over again
|
||||
}
|
||||
|
||||
public final void calcEstimatedReportedQuality() {
|
||||
this.estimatedQReported = -10 * Math.log10(calcExpectedErrors() / numObservations);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// misc. methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final double getEstimatedQReported() {
|
||||
return estimatedQReported;
|
||||
}
|
||||
|
||||
public final double getEmpiricalQuality() {
|
||||
if (empiricalQuality < 0)
|
||||
calcCombinedEmpiricalQuality();
|
||||
return empiricalQuality;
|
||||
}
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a hard copy of the recal datum element
|
||||
*
|
||||
* @return a new recal datum object with the same contents of this datum.
|
||||
*/
|
||||
protected RecalDatum copy() {
|
||||
public RecalDatum copy() {
|
||||
return new RecalDatum(numObservations, numMismatches, estimatedQReported, empiricalQuality);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%d,%d", numObservations, numMismatches, (byte) Math.floor(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
|
||||
private double calcExpectedErrors() {
|
||||
return (double) this.numObservations * qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
private double qualToErrorProb(final double qual) {
|
||||
return Math.pow(10.0, qual / -10.0);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class RecalibrationArgumentCollection {
|
|||
*/
|
||||
@Gather(BQSRGatherer.class)
|
||||
@Output
|
||||
public PrintStream RECAL_FILE;
|
||||
public File RECAL_FILE;
|
||||
|
||||
/**
|
||||
* List all implemented covariates.
|
||||
|
|
|
|||
|
|
@ -18,13 +18,11 @@ import java.util.*;
|
|||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
private final LinkedHashMap<BQSRKeyManager, Map<BitSet, RecalDatum>> keysAndTablesMap; // quick access reference to the read group table and its key manager
|
||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // list of all covariates to be used in this calculation
|
||||
|
||||
GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list
|
||||
|
||||
private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
|
@ -53,6 +51,7 @@ public class RecalibrationReport {
|
|||
final BQSRKeyManager keyManager = new BQSRKeyManager(requiredCovariatesToAdd, optionalCovariatesToAdd); // initializing it's corresponding key manager
|
||||
|
||||
int nRequiredCovariates = requiredCovariatesToAdd.size(); // the number of required covariates defines which table we are looking at (RG, QUAL or ALL_COVARIATES)
|
||||
final String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check.";
|
||||
if (nRequiredCovariates == 1) { // if there is only one required covariate, this is the read group table
|
||||
final GATKReportTable reportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
table = parseReadGroupTable(keyManager, reportTable);
|
||||
|
|
@ -292,7 +291,7 @@ public class RecalibrationReport {
|
|||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (Map<BitSet, RecalDatum> table : keysAndTablesMap.values())
|
||||
for (RecalDatum datum : table.values())
|
||||
datum.calcCombinedEmpiricalQuality(QualityUtils.MAX_QUAL_SCORE);
|
||||
datum.calcCombinedEmpiricalQuality();
|
||||
|
||||
quantizationInfo = new QuantizationInfo(keysAndTablesMap, RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ public class BaseRecalibration {
|
|||
* Constructor using a GATK Report file
|
||||
*
|
||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||
* @param quantizationLevels number of bins to quantize the quality scores
|
||||
*/
|
||||
public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) {
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
|
@ -80,7 +81,7 @@ public class BaseRecalibration {
|
|||
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
|
||||
byte qualityScore = originalQuals[offset];
|
||||
|
||||
if (qualityScore > QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
}
|
||||
|
|
|
|||
|
|
@ -335,10 +335,11 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
/**
|
||||
* Clears all attributes except ReadGroup of the read.
|
||||
*/
|
||||
public void simplify () {
|
||||
public GATKSAMRecord simplify () {
|
||||
GATKSAMReadGroupRecord rg = getReadGroup();
|
||||
this.clearAttributes();
|
||||
setReadGroup(rg);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -53,6 +53,17 @@ public class BQSRKeyManagerUnitTest {
|
|||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testOneCovariateWithOptionalCovariates() {
|
||||
final int nRequired = 1;
|
||||
final ArrayList<Covariate> covariates = new ArrayList<Covariate>(4);
|
||||
covariates.add(new ReadGroupCovariate());
|
||||
covariates.add(new QualityScoreCovariate());
|
||||
covariates.add(new CycleCovariate());
|
||||
covariates.add(new ContextCovariate());
|
||||
createReadAndTest(covariates, nRequired);
|
||||
}
|
||||
|
||||
private void createReadAndTest(List<Covariate> covariates, int nRequired) {
|
||||
int readLength = 1000;
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(ReadUtils.createRandomReadBases(readLength, true), ReadUtils.createRandomReadQuals(readLength), readLength + "M");
|
||||
|
|
|
|||
Loading…
Reference in New Issue