diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 68680dd10..e19a3c613 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -197,7 +197,7 @@ public class GenomeAnalysisEngine { private BaseRecalibration baseRecalibration = null; public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public boolean hasBaseRecalibration() { return baseRecalibration != null; } - public void setBaseRecalibration(File recalFile, int quantizationLevels) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels); } + public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); } /** * Actually run the GATK with the specified walker. @@ -227,7 +227,7 @@ public class GenomeAnalysisEngine { // if the use specified an input BQSR recalibration table then enable on the fly recalibration if (this.getArguments().BQSR_RECAL_FILE != null) - setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels); + setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals); // Determine how the threads should be divided between CPU vs. IO. determineThreadAllocation(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 13c737a2e..84e89e8ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -209,6 +209,12 @@ public class GATKArgumentCollection { @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false) public int quantizationLevels = -1; + /** + * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced. + */ + @Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false) + public boolean noIndelQuals = false; + @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) public byte defaultBaseQualities = -1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 0e8380633..0fa4234b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -664,8 +664,12 @@ public class SAMDataSource { IndexedFastaSequenceFile refReader, BaseRecalibration bqsrApplier, byte defaultBaseQualities) { - // **** NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! **** - // (otherwise we will process something that we may end up throwing away) + + // *********************************************************************************** // + // * NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * // + // * (otherwise we will process something that we may end up throwing away) * // + // *********************************************************************************** // + if (downsamplingFraction != null) wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 713c601fa..27226ba22 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -49,6 +49,8 @@ public class BaseRecalibration { private final RecalibrationTables recalibrationTables; private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation + private final boolean noIndelQuals; + private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. static { for (int i = 0; i < EventType.values().length; i++) @@ -58,10 +60,11 @@ public class BaseRecalibration { /** * Constructor using a GATK Report file * - * @param RECAL_FILE a GATK Report file containing the recalibration information + * @param RECAL_FILE a GATK Report file containing the recalibration information * @param quantizationLevels number of bins to quantize the quality scores + * @param noIndelQuals if true, do not emit base indel qualities */ - public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) { + public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) { RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); recalibrationTables = recalibrationReport.getRecalibrationTables(); @@ -73,6 +76,7 @@ public class BaseRecalibration { quantizationInfo.quantizeQualityScores(quantizationLevels); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); + this.noIndelQuals = noIndelQuals; } /** @@ -87,6 +91,7 @@ public class BaseRecalibration { this.recalibrationTables = recalibrationTables; this.requestedCovariates = requestedCovariates; readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); + noIndelQuals = false; } /** @@ -99,6 +104,11 @@ public class BaseRecalibration { public void recalibrateRead(final GATKSAMRecord read) { RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings + if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { + read.setBaseQualities(null, errorModel); + continue; + } + final byte[] quals = read.getBaseQualities(errorModel); final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index a925c7577..659615cf4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -173,10 +173,10 @@ public class GATKSAMRecord extends BAMRecord { setBaseQualities(quals); break; case BASE_INSERTION: - setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, SAMUtils.phredToFastq(quals) ); + setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) ); break; case BASE_DELETION: - setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, SAMUtils.phredToFastq(quals) ); + setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) ); break; default: throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );