diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index ccc9084b4..5d472189a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -104,10 +104,10 @@ public class BQSRIntegrationTest extends WalkerTest { @DataProvider(name = "PRTest") public Object[][] createPRTestData() { return new Object[][]{ - {new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")}, - {new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")}, - {new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")}, - {new PRTest(" -NIQ", "66aa65223f192ee39c1773aa187fd493")} + {new PRTest("", "66aa65223f192ee39c1773aa187fd493")}, + {new PRTest(" -qq -1 -IQ", "b7053d3d67aba6d8892f0a60f0ded338")}, + {new PRTest(" -qq 6 -IQ", "bfbf0855185b2b70aa35237fb71e4487")}, + {new PRTest(" -IQ", "d2d6ed8667cdba7e56f5db97d6262676")} }; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index e19a3c613..ef54e7cc6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -197,7 +197,17 @@ public class GenomeAnalysisEngine { private BaseRecalibration baseRecalibration = null; public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public boolean hasBaseRecalibration() { return baseRecalibration != null; } - public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); } + public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean useIndelQuals, final int preserveQLessThan) { + baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, useIndelQuals, preserveQLessThan, isGATKLite()); + } + + /** + * Utility method to determine whether this is the lite version of the GATK + */ + public boolean isGATKLite() { + // TODO -- this is just a place holder for now + return false; + } /** * Actually run the GATK with the specified walker. @@ -209,8 +219,10 @@ public class GenomeAnalysisEngine { //monitor.start(); setStartTime(new java.util.Date()); + final GATKArgumentCollection args = this.getArguments(); + // validate our parameters - if (this.getArguments() == null) { + if (args == null) { throw new ReviewedStingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null."); } @@ -218,16 +230,16 @@ public class GenomeAnalysisEngine { if (this.walker == null) throw new ReviewedStingException("The walker passed to GenomeAnalysisEngine can not be null."); - if (this.getArguments().nonDeterministicRandomSeed) + if (args.nonDeterministicRandomSeed) resetRandomGenerator(System.currentTimeMillis()); // TODO -- REMOVE ME WHEN WE STOP BCF testing - if ( this.getArguments().USE_SLOW_GENOTYPES ) + if ( args.USE_SLOW_GENOTYPES ) GenotypeBuilder.MAKE_FAST_BY_DEFAULT = false; // if the use specified an input BQSR recalibration table then enable on the fly recalibration - if (this.getArguments().BQSR_RECAL_FILE != null) - setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals); + if (args.BQSR_RECAL_FILE != null) + setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.enableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN); // Determine how the threads should be divided between CPU vs. IO. determineThreadAllocation(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index d865497cb..a5e38155f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.samples.PedigreeValidationType; +import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; @@ -190,14 +191,18 @@ public class GATKArgumentCollection { @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false) public Boolean useOriginalBaseQualities = false; + // -------------------------------------------------------------------------------------------------------------- + // + // BQSR arguments + // + // -------------------------------------------------------------------------------------------------------------- + /** - * After the header, data records occur one per line until the end of the file. The first several items on a line are the - * values of the individual covariates and will change depending on which covariates were specified at runtime. The last - * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, - * and the raw empirical quality score calculated by phred-scaling the mismatch rate. + * Enables on-the-fly recalibrate of base qualities. The covariates tables are produced by the BaseQualityScoreRecalibrator tool. + * Please be aware that one should only run recalibration with the covariates file created on the same input bam(s). */ - @Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration") - public File BQSR_RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously + @Input(fullName="BQSR", shortName="BQSR", required=false, doc="The input covariates table file which enables on-the-fly base quality score recalibration") + public File BQSR_RECAL_FILE = null; /** * Turns on the base quantization module. It requires a recalibration report (-BQSR). @@ -206,18 +211,34 @@ public class GATKArgumentCollection { * Any value greater than zero will be used to recalculate the quantization using that many levels. * Negative values mean that we should quantize using the recalibration report's quantization level. */ - @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false) + @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels (with -BQSR)", required=false) public int quantizationLevels = 0; /** - * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced. + * Turns on printing of the base insertion and base deletion tags when using the -BQSR argument. By default, only the base substitution qualities will be produced. */ - @Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false) - public boolean noIndelQuals = false; + @Argument(fullName="enable_indel_quals", shortName = "IQ", doc = "If true, enables printing of base insertion and base deletion tags (with -BQSR)", required=false) + public boolean enableIndelQuals = false; + + /** + * Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file. + * In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases. + * For example, Illumina writes Q2 bases when the machine has really gone wrong. This would be fine in and of itself, + * but when you select a subset of these reads based on their ability to align to the reference and their dinucleotide effect, + * your Q2 bin can be elevated to Q8 or Q10, leading to issues downstream. + */ + @Argument(fullName = "preserve_qscores_less_than", shortName = "preserveQ", doc = "Bases with quality scores less than this threshold won't be recalibrated (with -BQSR)", required = false) + public int PRESERVE_QSCORES_LESS_THAN = QualityUtils.MIN_USABLE_Q_SCORE; @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) public byte defaultBaseQualities = -1; + // -------------------------------------------------------------------------------------------------------------- + // + // Other utility arguments + // + // -------------------------------------------------------------------------------------------------------------- + @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 76cfe0018..f91d5e127 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -78,6 +78,12 @@ public class UserException extends ReviewedStingException { } } + public static class NotSupportedInGATKLite extends UserException { + public NotSupportedInGATKLite(String argument) { + super(String.format("Unfortunately, the argument %s is not supported in the Lite version of the GATK", argument)); + } + } + // todo -- fix up exception cause passing public static class MissingArgument extends CommandLineException { public MissingArgument(String arg, String message) { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 27226ba22..ca6ac5c9a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -49,7 +50,8 @@ public class BaseRecalibration { private final RecalibrationTables recalibrationTables; private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation - private final boolean noIndelQuals; + private final boolean emitIndelQuals; + private final int preserveQLessThan; private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. static { @@ -62,9 +64,15 @@ public class BaseRecalibration { * * @param RECAL_FILE a GATK Report file containing the recalibration information * @param quantizationLevels number of bins to quantize the quality scores - * @param noIndelQuals if true, do not emit base indel qualities + * @param emitIndelQuals if true, emit base indel qualities + * @param preserveQLessThan preserve quality scores less than this value + * @param isGATKLite is this being called from the full or Lite version of the GATK */ - public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) { + public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean emitIndelQuals, final int preserveQLessThan, final boolean isGATKLite) { + // check for unsupported access + if (isGATKLite && emitIndelQuals) + throw new UserException.NotSupportedInGATKLite("enable_indel_quals"); + RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); recalibrationTables = recalibrationReport.getRecalibrationTables(); @@ -76,22 +84,8 @@ public class BaseRecalibration { quantizationInfo.quantizeQualityScores(quantizationLevels); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - this.noIndelQuals = noIndelQuals; - } - - /** - * This constructor only exists for testing purposes. - * - * @param quantizationInfo the quantization info object - * @param recalibrationTables the map of key managers and recalibration tables - * @param requestedCovariates the list of requested covariates - */ - protected BaseRecalibration(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { - this.quantizationInfo = quantizationInfo; - this.recalibrationTables = recalibrationTables; - this.requestedCovariates = requestedCovariates; - readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - noIndelQuals = false; + this.emitIndelQuals = emitIndelQuals; + this.preserveQLessThan = preserveQLessThan; } /** @@ -104,7 +98,7 @@ public class BaseRecalibration { public void recalibrateRead(final GATKSAMRecord read) { RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings - if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { + if (!emitIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { read.setBaseQualities(null, errorModel); continue; } @@ -117,7 +111,7 @@ public class BaseRecalibration { final byte originalQualityScore = quals[offset]; - if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base quals[offset] = recalibratedQualityScore;