From 17efbbf8b18a758439192a1e606b32a277e2ae6a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 3 Jul 2012 16:37:21 -0400 Subject: [PATCH 1/4] Fixed ReadClipperUnitTest The behavior of the clipping on weird cigar strings such as 1I1S1H and 9S56H has changed, and the test has to change accordingly. --- .../utils/clipping/ReadClipperTestUtils.java | 11 ++-- .../utils/clipping/ReadClipperUnitTest.java | 53 ++++++++++--------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index 16b141bc3..baa2f6218 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -84,14 +84,14 @@ public class ReadClipperTestUtils { } private static boolean isCigarValid(Cigar cigar) { - if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation) + if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation) - Stack cigarElementStack = new Stack(); // Stack to invert cigar string to find ending operator + Stack cigarElementStack = new Stack(); // Stack to invert cigar string to find ending operator CigarOperator startingOp = null; CigarOperator endingOp = null; // check if it doesn't start with deletions - boolean readHasStarted = false; // search the list of elements for the starting operator + boolean readHasStarted = false; // search the list of elements for the starting operator for (CigarElement cigarElement : cigar.getCigarElements()) { if (!readHasStarted) { if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) { @@ -102,19 +102,16 @@ public class ReadClipperTestUtils { cigarElementStack.push(cigarElement); } - readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator while (!cigarElementStack.empty()) { CigarElement cigarElement = cigarElementStack.pop(); if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) { - readHasStarted = true; endingOp = cigarElement.getOperator(); break; } } -// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION) if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION) - return true; // we don't accept reads starting or ending in deletions (add any other constraint here) + return true; // we don't accept reads starting or ending in deletions (add any other constraint here) } return false; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index 44c9ab255..a819e41c7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -151,51 +151,40 @@ public class ReadClipperUnitTest extends BaseTest { final byte LOW_QUAL = 2; final byte HIGH_QUAL = 30; - // create a read for every cigar permutation + /** create a read for every cigar permutation */ for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); byte[] quals = new byte[readLength]; for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) { - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail + + /** create a read with nLowQualBases in the left tail */ + Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addLeft = 0; addLeft < nLowQualBases; addLeft++) quals[addLeft] = LOW_QUAL; read.setBaseQualities(quals); GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); + checkClippedReadsForLowQualEnds(read, clipLeft, LOW_QUAL, nLowQualBases); - assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed - assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone - Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped - String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); - - - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail + /** create a read with nLowQualBases in the right tail */ + Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addRight = 0; addRight < nLowQualBases; addRight++) quals[readLength - addRight - 1] = LOW_QUAL; read.setBaseQualities(quals); GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); + checkClippedReadsForLowQualEnds(read, clipRight, LOW_QUAL, nLowQualBases); -// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString())); - - assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed - assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone - Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped - String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString())); - + /** create a read with nLowQualBases on both tails */ if (nLowQualBases <= readLength / 2) { - Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails + Utils.fillArrayWithByte(quals, HIGH_QUAL); for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) { quals[addBoth] = LOW_QUAL; quals[readLength - addBoth - 1] = LOW_QUAL; } read.setBaseQualities(quals); GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL); - - assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed - assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone - Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped - String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString())); + checkClippedReadsForLowQualEnds(read, clipBoth, LOW_QUAL, 2*nLowQualBases); } } } @@ -209,8 +198,8 @@ public class ReadClipperUnitTest extends BaseTest { CigarCounter original = new CigarCounter(read); CigarCounter clipped = new CigarCounter(clippedRead); - assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed - original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS + assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed + original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS } } @@ -286,11 +275,17 @@ public class ReadClipperUnitTest extends BaseTest { } } + private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) { + assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed + assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone + assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped + } + /** * Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd * - * @param original - * @param clipped + * @param original original read + * @param clipped clipped read */ private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) { if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) { @@ -299,6 +294,12 @@ public class ReadClipperUnitTest extends BaseTest { } } + private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) { + if (read.getCigarString().contains("M")) + Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); + } + + private boolean startsWithInsertion(Cigar cigar) { return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0; } From 7d30558e6f71e9c28dd2f05e1a406780ede16be8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 3 Jul 2012 23:47:01 -0400 Subject: [PATCH 2/4] Only 'pad' the cycle covariate for indels, not substitutions --- .../sting/gatk/walkers/bqsr/CycleCovariate.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 6188f2bcd..e3b7f2637 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -48,6 +48,7 @@ import java.util.EnumSet; public class CycleCovariate implements StandardCovariate { private static final int MAXIMUM_CYCLE_VALUE = 1000; + private static final int CUSHION_FOR_INDELS = 4; private static final EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); private static final EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); @@ -79,11 +80,11 @@ public class CycleCovariate implements StandardCovariate { increment = readOrderFactor; } - final int CUSHION = 4; - final int MAX_CYCLE = readLength - CUSHION - 1; + final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1; for (int i = 0; i < readLength; i++) { - final int key = (iMAX_CYCLE) ? -1 : keyFromCycle(cycle); - values.addCovariate(key, key, key, i); + final int substitutionKey = keyFromCycle(cycle); + final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey; + values.addCovariate(substitutionKey, indelKey, indelKey, i); cycle += increment; } } From 33306d2e201288b87c6ecf63c9dde84a2243bd96 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 4 Jul 2012 00:21:21 -0400 Subject: [PATCH 3/4] Changing the logic of the -standard argument; the way it stands currently one can never turn off the cycle or context covariates. Now they are on by default and users must opt out of them to turn them off. --- .../gatk/walkers/bqsr/RecalDataManager.java | 4 ++-- .../bqsr/RecalibrationArgumentCollection.java | 17 ++++++++++------- .../gatk/walkers/bqsr/RecalibrationReport.java | 2 +- public/testdata/exampleGRP.grp | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 80e8b574a..876ce585a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -170,7 +170,7 @@ public class RecalDataManager { final ArrayList requiredCovariates = addRequiredCovariatesToList(requiredClasses); // add the required covariates ArrayList optionalCovariates = new ArrayList(); - if (argumentCollection.USE_STANDARD_COVARIATES) + if (!argumentCollection.DO_NOT_USE_STANDARD_COVARIATES) optionalCovariates = addStandardCovariatesToList(standardClasses); // add the standard covariates if -standard was specified by the user if (argumentCollection.COVARIATES != null) { // parse the -cov arguments that were provided, skipping over the ones already specified @@ -180,7 +180,7 @@ public class RecalDataManager { if (requestedCovariateString.equalsIgnoreCase(covClass.getSimpleName())) { // -cov argument matches the class name for an implementing class foundClass = true; if (!requiredClasses.contains(covClass) && - (!argumentCollection.USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { + (argumentCollection.DO_NOT_USE_STANDARD_COVARIATES || !standardClasses.contains(covClass))) { try { final Covariate covariate = covClass.newInstance(); // now that we've found a matching class, try to instantiate it optionalCovariates.add(covariate); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 22b26317b..2688fe655 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -71,16 +71,19 @@ public class RecalibrationArgumentCollection { public boolean LIST_ONLY = false; /** - * Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you. See the list of covariates with -list. + * Note that the ReadGroup and QualityScore covariates are required and do not need to be specified. + * Also, unless --no_standard_covs is specified, the Cycle and Context covariates are standard and are included by default. + * Use the --list argument to see the available covariates. */ - @Argument(fullName = "covariate", shortName = "cov", doc = "Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required = false) + @Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false) public String[] COVARIATES = null; /* - * Use the standard set of covariates in addition to the ones listed using the -cov argument + * The Cycle and Context covariates are standard and are included by default unless this argument is provided. + * Note that the ReadGroup and QualityScore covariates are required and cannot be excluded. */ - @Argument(fullName = "standard_covs", shortName = "standard", doc = "Use the standard set of covariates in addition to the ones listed using the -cov argument", required = false) - public boolean USE_STANDARD_COVARIATES = true; + @Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false) + public boolean DO_NOT_USE_STANDARD_COVARIATES = false; ///////////////////////////// // Debugging-only Arguments @@ -172,8 +175,8 @@ public class RecalibrationArgumentCollection { argumentsTable.addColumn(RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addRowID("covariate", true); argumentsTable.set("covariate", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, (COVARIATES == null) ? "null" : Utils.join(",", COVARIATES)); - argumentsTable.addRowID("standard_covs", true); - argumentsTable.set("standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, USE_STANDARD_COVARIATES); + argumentsTable.addRowID("no_standard_covs", true); + argumentsTable.set("no_standard_covs", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES); argumentsTable.addRowID("run_without_dbsnp", true); argumentsTable.set("run_without_dbsnp", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP); argumentsTable.addRowID("solid_recal_mode", true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index c49ff8b47..9c6460320 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -247,7 +247,7 @@ public class RecalibrationReport { RAC.COVARIATES = value.toString().split(","); else if (argument.equals("standard_covs")) - RAC.USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); + RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value); else if (argument.equals("solid_recal_mode")) RAC.SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.recalModeFromString((String) value); diff --git a/public/testdata/exampleGRP.grp b/public/testdata/exampleGRP.grp index 619d6b212..2ec55ec57 100644 --- a/public/testdata/exampleGRP.grp +++ b/public/testdata/exampleGRP.grp @@ -11,11 +11,11 @@ insertions_default_quality 45 low_quality_tail 2 mismatches_context_size 2 mismatches_default_quality -1 +no_standard_covs false quantizing_levels 16 run_without_dbsnp false solid_nocall_strategy THROW_EXCEPTION solid_recal_mode SET_Q_ZERO -standard_covs true #:GATKTable:3:94:::; #:GATKTable:Quantized:Quality quantization map From dd571d9aa065461143b26b6d87e8709bff97d908 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 4 Jul 2012 01:22:20 -0400 Subject: [PATCH 4/4] Added a --no_indel_quals argument that when used with -BQSR inhibits the writing of base insertion and base deletion quality tags. --- .../sting/gatk/GenomeAnalysisEngine.java | 4 ++-- .../gatk/arguments/GATKArgumentCollection.java | 6 ++++++ .../gatk/datasources/reads/SAMDataSource.java | 8 ++++++-- .../utils/recalibration/BaseRecalibration.java | 14 ++++++++++++-- .../sting/utils/sam/GATKSAMRecord.java | 4 ++-- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 68680dd10..e19a3c613 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -197,7 +197,7 @@ public class GenomeAnalysisEngine { private BaseRecalibration baseRecalibration = null; public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public boolean hasBaseRecalibration() { return baseRecalibration != null; } - public void setBaseRecalibration(File recalFile, int quantizationLevels) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels); } + public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean noIndelQuals) { baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, noIndelQuals); } /** * Actually run the GATK with the specified walker. @@ -227,7 +227,7 @@ public class GenomeAnalysisEngine { // if the use specified an input BQSR recalibration table then enable on the fly recalibration if (this.getArguments().BQSR_RECAL_FILE != null) - setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels); + setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE, this.getArguments().quantizationLevels, this.getArguments().noIndelQuals); // Determine how the threads should be divided between CPU vs. IO. determineThreadAllocation(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 13c737a2e..84e89e8ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -209,6 +209,12 @@ public class GATKArgumentCollection { @Argument(fullName="quantize_quals", shortName = "qq", doc = "Quantize quality scores to a given number of levels.", required=false) public int quantizationLevels = -1; + /** + * Turns off printing of the base insertion and base deletion tags when using the -BQSR argument. Only the base substitution qualities will be produced. + */ + @Argument(fullName="no_indel_quals", shortName = "NIQ", doc = "If true, inhibits printing of base insertion and base deletion tags.", required=false) + public boolean noIndelQuals = false; + @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) public byte defaultBaseQualities = -1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 0e8380633..0fa4234b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -664,8 +664,12 @@ public class SAMDataSource { IndexedFastaSequenceFile refReader, BaseRecalibration bqsrApplier, byte defaultBaseQualities) { - // **** NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! **** - // (otherwise we will process something that we may end up throwing away) + + // *********************************************************************************** // + // * NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * // + // * (otherwise we will process something that we may end up throwing away) * // + // *********************************************************************************** // + if (downsamplingFraction != null) wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 713c601fa..27226ba22 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -49,6 +49,8 @@ public class BaseRecalibration { private final RecalibrationTables recalibrationTables; private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation + private final boolean noIndelQuals; + private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. static { for (int i = 0; i < EventType.values().length; i++) @@ -58,10 +60,11 @@ public class BaseRecalibration { /** * Constructor using a GATK Report file * - * @param RECAL_FILE a GATK Report file containing the recalibration information + * @param RECAL_FILE a GATK Report file containing the recalibration information * @param quantizationLevels number of bins to quantize the quality scores + * @param noIndelQuals if true, do not emit base indel qualities */ - public BaseRecalibration(final File RECAL_FILE, int quantizationLevels) { + public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean noIndelQuals) { RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); recalibrationTables = recalibrationReport.getRecalibrationTables(); @@ -73,6 +76,7 @@ public class BaseRecalibration { quantizationInfo.quantizeQualityScores(quantizationLevels); readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); + this.noIndelQuals = noIndelQuals; } /** @@ -87,6 +91,7 @@ public class BaseRecalibration { this.recalibrationTables = recalibrationTables; this.requestedCovariates = requestedCovariates; readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); + noIndelQuals = false; } /** @@ -99,6 +104,11 @@ public class BaseRecalibration { public void recalibrateRead(final GATKSAMRecord read) { RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings + if (noIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { + read.setBaseQualities(null, errorModel); + continue; + } + final byte[] quals = read.getBaseQualities(errorModel); final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index a925c7577..659615cf4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -173,10 +173,10 @@ public class GATKSAMRecord extends BAMRecord { setBaseQualities(quals); break; case BASE_INSERTION: - setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, SAMUtils.phredToFastq(quals) ); + setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) ); break; case BASE_DELETION: - setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, SAMUtils.phredToFastq(quals) ); + setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) ); break; default: throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel );