A bunch of BQSR changes: 1) by default we do not emit indel quals, but they can be turned on with --enable_indel_quals. 2) We check whether or not we are running in Lite mode (not done yet) and if so and the user is trying to recalibrate indels, we throw a User Error (not supported). 3) Like v1 we now allow the user to set the qual value below which we don't recalibrate (this was the remaining source of differences in the v1 vs. v2 plots).
This commit is contained in:
parent
d5b3a2eabf
commit
40618ac471
|
|
@ -104,10 +104,10 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createPRTestData() {
|
||||
return new Object[][]{
|
||||
{new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")},
|
||||
{new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")},
|
||||
{new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")},
|
||||
{new PRTest(" -NIQ", "66aa65223f192ee39c1773aa187fd493")}
|
||||
{new PRTest("", "66aa65223f192ee39c1773aa187fd493")},
|
||||
{new PRTest(" -qq -1 -IQ", "b7053d3d67aba6d8892f0a60f0ded338")},
|
||||
{new PRTest(" -qq 6 -IQ", "bfbf0855185b2b70aa35237fb71e4487")},
|
||||
{new PRTest(" -IQ", "d2d6ed8667cdba7e56f5db97d6262676")}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -197,7 +197,17 @@ public class GenomeAnalysisEngine {
|
|||
private BaseRecalibration baseRecalibration = null;
|
||||
public BaseRecalibration getBaseRecalibration() { return baseRecalibration; }
|
||||
public boolean hasBaseRecalibration() { return baseRecalibration != null; }
|
||||
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); }
|
||||
public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean useIndelQuals, final int preserveQLessThan) {
|
||||
baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, useIndelQuals, preserveQLessThan, isGATKLite());
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to determine whether this is the lite version of the GATK
|
||||
*/
|
||||
public boolean isGATKLite() {
|
||||
// TODO -- this is just a place holder for now
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually run the GATK with the specified walker.
|
||||
|
|
@ -209,8 +219,10 @@ public class GenomeAnalysisEngine {
|
|||
//monitor.start();
|
||||
setStartTime(new java.util.Date());
|
||||
|
||||
final GATKArgumentCollection args = this.getArguments();
|
||||
|
||||
// validate our parameters
|
||||
if (this.getArguments() == null) {
|
||||
if (args == null) {
|
||||
throw new ReviewedStingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
|
||||
}
|
||||
|
||||
|
|
@ -218,16 +230,16 @@ public class GenomeAnalysisEngine {
|
|||
if (this.walker == null)
|
||||
throw new ReviewedStingException("The walker passed to GenomeAnalysisEngine can not be null.");
|
||||
|
||||
if (this.getArguments().nonDeterministicRandomSeed)
|
||||
if (args.nonDeterministicRandomSeed)
|
||||
resetRandomGenerator(System.currentTimeMillis());
|
||||
|
||||
// TODO -- REMOVE ME WHEN WE STOP BCF testing
|
||||
if ( this.getArguments().USE_SLOW_GENOTYPES )
|
||||
if ( args.USE_SLOW_GENOTYPES )
|
||||
GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false;
|
||||
|
||||
// if the use specified an input BQSR recalibration table then enable on the fly recalibration
|
||||
if (this.getArguments().BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals);
|
||||
if (args.BQSR_RECAL_FILE != null)
|
||||
setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.enableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN);
|
||||
|
||||
// Determine how the threads should be divided between CPU vs. IO.
|
||||
determineThreadAllocation();
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.DownsampleType;
|
|||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
|
|
@ -190,14 +191,18 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
|
||||
public Boolean useOriginalBaseQualities = false;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// BQSR arguments
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||
* Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool.
|
||||
* Please be aware that one should only run recalibration with the covariates file created on the same input bam(s).
|
||||
*/
|
||||
@Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration")
|
||||
public File BQSR_RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously
|
||||
@Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration")
|
||||
public File BQSR_RECAL_FILE = null;
|
||||
|
||||
/**
|
||||
* Turns on the base quantization module. It requires a recalibration report (-BQSR).
|
||||
|
|
@ -206,18 +211,34 @@ public class GATKArgumentCollection {
|
|||
* Any value greater than zero will be used to recalculate the quantization using that many levels.
|
||||
* Negative values mean that we should quantize using the recalibration report's quantization level.
|
||||
*/
|
||||
@Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false)
|
||||
@Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels (with -BQSR)", required=false)
|
||||
public int quantizationLevels = 0;
|
||||
|
||||
/**
|
||||
* Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced.
|
||||
* Turns on printing of the base insertion and base deletion tags when using the -BQSR argument. By default, only the base substitution qualities will be produced.
|
||||
*/
|
||||
@Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false)
|
||||
public boolean noIndelQuals = false;
|
||||
@Argument(fullName="enable_indel_quals", shortName = "IQ", doc = "If true, enables printing of base insertion and base deletion tags (with -BQSR)", required=false)
|
||||
public boolean enableIndelQuals = false;
|
||||
|
||||
/**
|
||||
* Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file.
|
||||
* In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases.
|
||||
* For example, Illumina writes Q2 bases when the machine has really gone wrong. This would be fine in and of itself,
|
||||
* but when you select a subset of these reads based on their ability to align to the reference and their dinucleotide effect,
|
||||
* your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream.
|
||||
*/
|
||||
@Argument(fullName = "preserve_qscores_less_than", shortName = "preserveQ", doc = "Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)", required = false)
|
||||
public int PRESERVE_QSCORES_LESS_THAN = QualityUtils.MIN_USABLE_Q_SCORE;
|
||||
|
||||
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
|
||||
public byte defaultBaseQualities = -1;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Other utility arguments
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
|
|
|
|||
|
|
@ -78,6 +78,12 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class NotSupportedInGATKLite extends UserException {
|
||||
public NotSupportedInGATKLite(String argument) {
|
||||
super(String.format("Unfortunately, the argument %s is not supported in the Lite version of the GATK", argument));
|
||||
}
|
||||
}
|
||||
|
||||
// todo -- fix up exception cause passing
|
||||
public static class MissingArgument extends CommandLineException {
|
||||
public MissingArgument(String arg, String message) {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -49,7 +50,8 @@ public class BaseRecalibration {
|
|||
private final RecalibrationTables recalibrationTables;
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
private final boolean noIndelQuals;
|
||||
private final boolean emitIndelQuals;
|
||||
private final int preserveQLessThan;
|
||||
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
|
|
@ -62,9 +64,15 @@ public class BaseRecalibration {
|
|||
*
|
||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||
* @param quantizationLevels number of bins to quantize the quality scores
|
||||
* @param noIndelQuals if true, do not emit base indel qualities
|
||||
* @param emitIndelQuals if true, emit base indel qualities
|
||||
* @param preserveQLessThan preserve quality scores less than this value
|
||||
* @param isGATKLite is this being called from the full or Lite version of the GATK
|
||||
*/
|
||||
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) {
|
||||
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean emitIndelQuals, final int preserveQLessThan, final boolean isGATKLite) {
|
||||
// check for unsupported access
|
||||
if (isGATKLite && emitIndelQuals)
|
||||
throw new UserException.NotSupportedInGATKLite("enable_indel_quals");
|
||||
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||
|
|
@ -76,22 +84,8 @@ public class BaseRecalibration {
|
|||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
this.noIndelQuals = noIndelQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
* This constructor only exists for testing purposes.
|
||||
*
|
||||
* @param quantizationInfo the quantization info object
|
||||
* @param recalibrationTables the map of key managers and recalibration tables
|
||||
* @param requestedCovariates the list of requested covariates
|
||||
*/
|
||||
protected BaseRecalibration(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
noIndelQuals = false;
|
||||
this.emitIndelQuals = emitIndelQuals;
|
||||
this.preserveQLessThan = preserveQLessThan;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -104,7 +98,7 @@ public class BaseRecalibration {
|
|||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
if (!emitIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
read.setBaseQualities(null, errorModel);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -117,7 +111,7 @@ public class BaseRecalibration {
|
|||
|
||||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
|
|
|
|||
Loading…
Reference in New Issue