From 68d0211fa1cf8e0f9803e64d60672b800716a20c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 18 Apr 2012 13:02:41 -0400 Subject: [PATCH] Improved BQSR plotting and some new parameters * Refactored CycleCovariate to be a fragment covariate instead of a per read covariate * Refactored the CycleCovariateUnitTest to test the pairing information * Updated BQSR Integration tests accordingly * Made quantization levels parameter not hidden anymore * Added hidden option to keep intermediate plotting files for debug purposes (they're automatically deleted) * Added hidden option not to generate the plots automatically (important for scatter/gathering) --- .../gatk/walkers/bqsr/CycleCovariate.java | 14 +++++++------- .../bqsr/RecalibrationArgumentCollection.java | 18 ++++++++++++++++-- .../gatk/walkers/bqsr/RecalibrationReport.java | 6 ++++++ .../walkers/bqsr/CycleCovariateUnitTest.java | 10 +++++++++- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java index 7bc6cd754..54a90a959 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariate.java @@ -66,18 +66,18 @@ public class CycleCovariate implements StandardCovariate { // Discrete cycle platforms if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) { - final short init; + final short readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? (short) -1 : 1; final short increment; - if (!read.getReadNegativeStrandFlag()) { - init = 1; - increment = 1; + short cycle; + if (read.getReadNegativeStrandFlag()) { + cycle = (short) (read.getReadLength() * readOrderFactor); + increment = (short) (-1 * readOrderFactor); } else { - init = (short) read.getReadLength(); - increment = -1; + cycle = readOrderFactor; + increment = readOrderFactor; } - short cycle = init; for (int i = 0; i < read.getReadLength(); i++) { cycles[i] = BitSetUtils.bitSetFrom(cycle); cycle += increment; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 4a695ecb6..b5768eedd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -143,9 +143,18 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false) public byte DELETIONS_DEFAULT_QUALITY = 45; + /** + * Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter defines the quality below which (inclusive) a tail is considered low quality + */ @Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false) public byte LOW_QUAL_TAIL = 2; + /** + * BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base qualities, this is done by the engine with the -qq or -BQSR options. + * This parameter tells BQSR the number of levels of quantization to use to build the quantization table. + */ + @Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output") + public int QUANTIZING_LEVELS = 16; @Hidden @@ -155,8 +164,11 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; @Hidden - @Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output") - public int QUANTIZING_LEVELS = 16; + @Argument(fullName = "keep_intermediate_files", shortName = "k", required = false, doc ="does not remove the temporary csv file created to generate the plots") + public boolean KEEP_INTERMEDIATE_FILES = false; + @Hidden + @Argument(fullName = "no_plots", shortName = "np", required = false, doc = "does not generate any plots -- useful for queue scatter/gathering") + public boolean NO_PLOTS = false; public GATKReportTable generateReportTable() { GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run"); @@ -176,6 +188,8 @@ public class RecalibrationArgumentCollection { argumentsTable.set("default_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM); argumentsTable.set("force_platform", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM); argumentsTable.set("quantizing_levels", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS); + argumentsTable.set("keep_intermediate_files", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, KEEP_INTERMEDIATE_FILES); + argumentsTable.set("no_plots", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, NO_PLOTS); return argumentsTable; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index 19c04361b..2962c4674 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -279,6 +279,12 @@ public class RecalibrationReport { else if (primaryKey.equals("quantizing_levels")) RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value); + + else if (primaryKey.equals("keep_intermediate_files")) + RAC.KEEP_INTERMEDIATE_FILES = Boolean.parseBoolean((String) value); + + else if (primaryKey.equals("no_plots")) + RAC.NO_PLOTS = Boolean.parseBoolean((String) value); } return RAC; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java index d80cddd3e..cec541a97 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/CycleCovariateUnitTest.java @@ -26,8 +26,9 @@ public class CycleCovariateUnitTest { @Test(enabled = true) public void testSimpleCycles() { - short readLength = 10; + short readLength = 10; GATKSAMRecord read = ReadUtils.createRandomRead(readLength); + read.setReadPairedFlag(true); read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); read.getReadGroup().setPlatform("illumina"); @@ -38,6 +39,13 @@ public class CycleCovariateUnitTest { values = covariate.getValues(read); verifyCovariateArray(values.getMismatches(), readLength, (short) -1); + read.setSecondOfPairFlag(true); + values = covariate.getValues(read); + verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1); + + read.setReadNegativeStrandFlag(false); + values = covariate.getValues(read); + verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1); } private void verifyCovariateArray(BitSet[] values, short init, short increment) {