From c54e84e739f581b604d7d8b5e726458b5e2e7fda Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 15 Jun 2012 09:28:56 -0400 Subject: [PATCH] Ryan confirmed that we don't need separate arguments to control the context size for insertions and deletions, which allows us to cut down the expensive context calculations. --- .../gatk/walkers/bqsr/ContextCovariate.java | 19 ++++++++----------- .../bqsr/RecalibrationArgumentCollection.java | 18 +++++------------- .../walkers/bqsr/RecalibrationReport.java | 7 ++----- .../bqsr/ContextCovariateUnitTest.java | 4 ++-- .../walkers/bqsr/ReadCovariatesUnitTest.java | 4 ++-- .../bqsr/RecalibrationReportUnitTest.java | 2 +- 6 files changed, 20 insertions(+), 34 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 0efca66c0..fae2ac898 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -41,8 +41,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; public class ContextCovariate implements StandardCovariate { private int mismatchesContextSize; - private int insertionsContextSize; - private int deletionsContextSize; + private int indelsContextSize; private byte LOW_QUAL_TAIL; @@ -50,19 +49,16 @@ public class ContextCovariate implements StandardCovariate { @Override public void initialize(final RecalibrationArgumentCollection RAC) { mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE; - insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE; - deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE; + indelsContextSize = RAC.INDELS_CONTEXT_SIZE; if (mismatchesContextSize > MAX_DNA_CONTEXT) throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize)); - if (insertionsContextSize > MAX_DNA_CONTEXT) - throw new UserException.BadArgumentValue("insertions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, insertionsContextSize)); - if (deletionsContextSize > MAX_DNA_CONTEXT) - throw new UserException.BadArgumentValue("deletions_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, deletionsContextSize)); + if (indelsContextSize > MAX_DNA_CONTEXT) + throw new UserException.BadArgumentValue("indels_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, indelsContextSize)); LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL; - if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0) - throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize)); + if (mismatchesContextSize <= 0 || indelsContextSize <= 0) + throw new UserException(String.format("Context size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Indels: %d", mismatchesContextSize, indelsContextSize)); } @Override @@ -77,7 +73,8 @@ public class ContextCovariate implements StandardCovariate { final int readLength = clippedRead.getReadLength(); for (int i = 0; i < readLength; i++) { - values.addCovariate(contextWith(bases, i, mismatchesContextSize), contextWith(bases, i, insertionsContextSize), contextWith(bases, i, deletionsContextSize), (negativeStrand ? readLength - i - 1 : i)); + final long indelKey = contextWith(bases, i, indelsContextSize); + values.addCovariate(contextWith(bases, i, mismatchesContextSize), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index 340620c2f..22b26317b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -114,16 +114,10 @@ public class RecalibrationArgumentCollection { public int MISMATCHES_CONTEXT_SIZE = 2; /** - * The context covariate will use a context of this size to calculate it's covariate value for base insertions + * The context covariate will use a context of this size to calculate it's covariate value for base insertions and deletions */ - @Argument(fullName = "insertions_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions", required = false) - public int INSERTIONS_CONTEXT_SIZE = 8; - - /** - * The context covariate will use a context of this size to calculate it's covariate value for base deletions - */ - @Argument(fullName = "deletions_context_size", shortName = "dcs", doc = "size of the k-mer context to be used for base deletions", required = false) - public int DELETIONS_CONTEXT_SIZE = 8; + @Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false) + public int INDELS_CONTEXT_SIZE = 8; /** * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off) @@ -188,10 +182,8 @@ public class RecalibrationArgumentCollection { argumentsTable.set("solid_nocall_strategy", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY); argumentsTable.addRowID("mismatches_context_size", true); argumentsTable.set("mismatches_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE); - argumentsTable.addRowID("insertions_context_size", true); - argumentsTable.set("insertions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_CONTEXT_SIZE); - argumentsTable.addRowID("deletions_context_size", true); - argumentsTable.set("deletions_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_CONTEXT_SIZE); + argumentsTable.addRowID("indels_context_size", true); + argumentsTable.set("indels_context_size", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE); argumentsTable.addRowID("mismatches_default_quality", true); argumentsTable.set("mismatches_default_quality", RecalDataManager.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY); argumentsTable.addRowID("insertions_default_quality", true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index ed1864bba..5af15c01c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -266,11 +266,8 @@ public class RecalibrationReport { else if (argument.equals("mismatches_context_size")) RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value); - else if (argument.equals("insertions_context_size")) - RAC.INSERTIONS_CONTEXT_SIZE = Integer.parseInt((String) value); - - else if (argument.equals("deletions_context_size")) - RAC.DELETIONS_CONTEXT_SIZE = Integer.parseInt((String) value); + else if (argument.equals("indels_context_size")) + RAC.INDELS_CONTEXT_SIZE = Integer.parseInt((String) value); else if (argument.equals("mismatches_default_quality")) RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java index e73d40603..ee5395454 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariateUnitTest.java @@ -32,8 +32,8 @@ public class ContextCovariateUnitTest { covariate.recordValues(read, readCovariates); verifyCovariateArray(readCovariates.getMismatchesKeySet(), RAC.MISMATCHES_CONTEXT_SIZE, clippedRead, covariate); - verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INSERTIONS_CONTEXT_SIZE, clippedRead, covariate); - verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.DELETIONS_CONTEXT_SIZE, clippedRead, covariate); + verifyCovariateArray(readCovariates.getInsertionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); + verifyCovariateArray(readCovariates.getDeletionsKeySet(), RAC.INDELS_CONTEXT_SIZE, clippedRead, covariate); } public static void verifyCovariateArray(long[][] values, int contextSize, GATKSAMRecord read, Covariate contextCovariate) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java index 8de416e29..37994cf12 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/ReadCovariatesUnitTest.java @@ -61,8 +61,8 @@ public class ReadCovariatesUnitTest { // check context Assert.assertEquals(coCov.formatKey(rc.getMismatchesKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.MISMATCHES_CONTEXT_SIZE)); - Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INSERTIONS_CONTEXT_SIZE)); - Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.DELETIONS_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getInsertionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); + Assert.assertEquals(coCov.formatKey(rc.getDeletionsKeySet(i)[2]), ContextCovariateUnitTest.expectedContext(read, i, RAC.INDELS_CONTEXT_SIZE)); // check cycle Assert.assertEquals(cyCov.formatKey(rc.getMismatchesKeySet(i)[3]), "" + (i+1)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java index a2dbf0241..e5fde0efc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReportUnitTest.java @@ -76,7 +76,7 @@ public class RecalibrationReportUnitTest { read.setBaseQualities(readQuals); - final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INSERTIONS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); + final int expectedKeys = expectedNumberOfKeys(4, length, RAC.INDELS_CONTEXT_SIZE, RAC.MISMATCHES_CONTEXT_SIZE); int nKeys = 0; // keep track of how many keys were produced final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates); for (int offset = 0; offset < length; offset++) {