From 20c1fbaf0f7b71a1ee16e463e0f464bfdd8c0158 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Tue, 6 Mar 2012 14:22:45 -0500 Subject: [PATCH 01/26] Fixing a merge (turning off downsampling on DoC) --- .../sting/gatk/walkers/coverage/DepthOfCoverageWalker.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 94f9eb6c5..833dce932 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -29,6 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -115,6 +116,7 @@ import java.util.*; // todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now @By(DataSource.REFERENCE) @PartitionBy(PartitionType.NONE) +@Downsample(by= DownsampleType.NONE, toCoverage=Integer.MAX_VALUE) public class DepthOfCoverageWalker extends LocusWalker>, CoveragePartitioner> implements TreeReducible { @Output @Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"}) From 811f871f7875d302f1876d97ed3247974afdd00b Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 6 Mar 2012 15:25:19 -0500 Subject: [PATCH 02/26] Do not fail tests that require the GATK private key if the user does not have permission to read it Several of the unit tests for the new key authorization feature require read access to the GATK master private key file. Since this file is only readable by members of the group gsagit, this makes it hard for people outside the group to run the test suite. Now, we skip tests that require the master private key if the private key exists (since not existing would be a true error) but is not readable by the user running the test suite Bamboo, of course, will always be able to run these tests. --- .../sting/utils/crypt/CryptUtilsUnitTest.java | 21 +++++++++++++++++++ .../sting/utils/crypt/GATKKeyUnitTest.java | 19 +++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java index eae4486c6..f5cfea148 100644 --- a/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/CryptUtilsUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.SkipException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import org.testng.Assert; @@ -64,11 +65,21 @@ public class CryptUtilsUnitTest extends BaseTest { @Test public void testGATKMasterKeyPairMutualDecryption() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testGATKMasterKeyPairMutualDecryption")); + } + Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKMasterPublicKey())); } @Test public void testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testGATKMasterPrivateKeyWithDistributedPublicKeyMutualDecryption")); + } + Assert.assertTrue(CryptUtils.keysDecryptEachOther(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey())); } @@ -156,6 +167,11 @@ public class CryptUtilsUnitTest extends BaseTest { @Test public void testLoadGATKMasterPrivateKey() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testLoadGATKMasterPrivateKey")); + } + PrivateKey gatkMasterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); } @@ -174,4 +190,9 @@ public class CryptUtilsUnitTest extends BaseTest { Assert.assertEquals(originalKey.getAlgorithm(), keyFromDisk.getAlgorithm()); Assert.assertEquals(originalKey.getFormat(), keyFromDisk.getFormat()); } + + private boolean gatkPrivateKeyExistsButReadPermissionDenied() { + File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE); + return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead(); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java index 5e7b07a1e..660f95796 100644 --- a/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/crypt/GATKKeyUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.crypt; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.SkipException; import org.testng.annotations.Test; import org.testng.Assert; @@ -39,6 +40,11 @@ public class GATKKeyUnitTest extends BaseTest { @Test public void testCreateGATKKeyUsingMasterKeyPair() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testCreateGATKKeyUsingMasterKeyPair")); + } + PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); PublicKey masterPublicKey = CryptUtils.loadGATKMasterPublicKey(); @@ -49,6 +55,11 @@ public class GATKKeyUnitTest extends BaseTest { @Test public void testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey() { + if ( gatkPrivateKeyExistsButReadPermissionDenied() ) { + throw new SkipException(String.format("Skipping test %s because we do not have permission to read the GATK private key", + "testCreateGATKKeyUsingMasterPrivateKeyAndDistributedPublicKey")); + } + PrivateKey masterPrivateKey = CryptUtils.loadGATKMasterPrivateKey(); PublicKey distributedPublicKey = CryptUtils.loadGATKDistributedPublicKey(); @@ -82,8 +93,7 @@ public class GATKKeyUnitTest extends BaseTest { KeyPair keyPair = CryptUtils.generateKeyPair(); // Email addresses cannot contain the NUL byte, since it's used as a sectional delimiter in the key file: - GATKKey key = new GATKKey(CryptUtils.loadGATKMasterPrivateKey(), CryptUtils.loadGATKDistributedPublicKey(), - emailAddressWithNulByte); + GATKKey key = new GATKKey(keyPair.getPrivate(), keyPair.getPublic(), emailAddressWithNulByte); } @Test @@ -110,4 +120,9 @@ public class GATKKeyUnitTest extends BaseTest { GATKKey key = new GATKKey(CryptUtils.loadGATKDistributedPublicKey(), nonExistentFile); } + + private boolean gatkPrivateKeyExistsButReadPermissionDenied() { + File gatkPrivateKey = new File(CryptUtils.GATK_MASTER_PRIVATE_KEY_FILE); + return gatkPrivateKey.exists() && ! gatkPrivateKey.canRead(); + } } From 569be953b905405dea6faaf5b0f44b9bce60e76a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 6 Mar 2012 16:54:59 -0500 Subject: [PATCH 06/26] Bugfix for VariantEval -- We weren't properly handling the case where a site had both a SNP and indel in both eval and comp. These would naturally pair off as SNP x SNP and INDEL x INDEL in eval, but we'd still invoke update2 with (null, SNP) and (null, INDEL) resulting most conspicously as incorrect false negatives in the validation report. -- Updating misc. integrationtests, as the counting of comps (in particular for dbSNP) was inflated because of this effect. --- .../varianteval/VariantEvalWalker.java | 69 ++++++++++++------- .../varianteval/util/VariantEvalUtils.java | 2 - .../VariantEvalIntegrationTest.java | 10 +-- 3 files changed, 50 insertions(+), 31 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 74291e025..d18c7e10a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import com.google.java.contract.Requires; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.util.IntervalTree; import net.sf.samtools.SAMSequenceRecord; @@ -19,11 +20,8 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.IntervalStratification; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; -import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; -import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; @@ -32,7 +30,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -389,9 +386,9 @@ public class VariantEvalWalker extends RodWalker implements Tr nec.apply(tracker, ref, context, comp, eval); } - // eval=null against all comps of different type + // eval=null against all comps of different type that aren't bound to another eval for ( VariantContext otherComp : compSet ) { - if ( otherComp != comp ) { + if ( otherComp != comp && ! compHasMatchingEval(otherComp, evalSetBySample) ) { synchronized (nec) { nec.apply(tracker, ref, context, otherComp, null); } @@ -409,6 +406,35 @@ public class VariantEvalWalker extends RodWalker implements Tr return null; } + @Requires({"comp != null", "evals != null"}) + private boolean compHasMatchingEval(final VariantContext comp, final Collection evals) { + // find all of the matching comps + for ( final VariantContext eval : evals ) { + if ( eval != null && doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) != EvalCompMatchType.NO_MATCH ) + return true; + } + + // nothing matched + return false; + } + + private enum EvalCompMatchType { NO_MATCH, STRICT, LENIENT } + + @Requires({"eval != null", "comp != null"}) + private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { + // find all of the matching comps + if ( comp.getType() != eval.getType() ) + return EvalCompMatchType.NO_MATCH; + + // find the comp which matches both the reference allele and alternate allele from eval + final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0); + final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0); + if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference()))) + return EvalCompMatchType.STRICT; + else + return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT; + } + private VariantContext findMatchingComp(final VariantContext eval, final Collection comps) { // if no comps, return null if ( comps == null || comps.isEmpty() ) @@ -419,26 +445,21 @@ public class VariantEvalWalker extends RodWalker implements Tr return comps.iterator().next(); // find all of the matching comps - List matchingComps = new ArrayList(comps.size()); - for ( VariantContext comp : comps ) { - if ( comp.getType() == eval.getType() ) - matchingComps.add(comp); + VariantContext lenientMatch = null; + for ( final VariantContext comp : comps ) { + switch ( doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch) ) { + case STRICT: + return comp; + case LENIENT: + if ( lenientMatch == null ) lenientMatch = comp; + break; + case NO_MATCH: + ; + } } - // if no matching comp, return null - if ( matchingComps.size() == 0 ) - return null; - - // find the comp which matches both the reference allele and alternate allele from eval - Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0); - for ( VariantContext comp : matchingComps ) { - Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0); - if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) ) - return comp; - } - - // if none match, just return the first one unless we require a strict match - return (requireStrictAlleleMatch ? null : matchingComps.get(0)); + // nothing matched, just return lenientMatch, which might be null + return lenientMatch; } public Integer treeReduce(Integer lhs, Integer rhs) { return null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index cb44ca522..fdeb6919d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -417,8 +417,6 @@ public class VariantEvalUtils { } } else { stateKeys.add(stateKey); - - return stateKeys; } return stateKeys; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 5c3a43c96..36c093e8f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("f909fd8374f663e983b9b3fda4cf5cf1") + Arrays.asList("c8d8bffa5c572df9dec7364f71a1b943") ); executeTest("testFunctionClassWithSnpeff", spec); } @@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("4f60acc8a4b21c4b4ccb51ad9071449c")); + 1, Arrays.asList("c49e239292704447a36e01ee9a71e729")); executeTestParallel("testSelect1", spec); } @@ -335,7 +335,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("190e1a171132832bf92fbca56a9c40bb")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e42cda858649a35eaa9d14ea2d70a956")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -347,7 +347,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("08586d443fdcf3b7f63b8f9e3a943c62")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9561cb4c7aa36dcf30ba253385299859")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -463,7 +463,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef") + Arrays.asList("397b0e77459b9b69d2e0dd1dac320c3c") ); executeTest("testModernVCFWithLargeIndels", spec); } From 0376d73ece4e337c1b29935f200fe9e478945d2a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Mar 2012 13:08:52 -0500 Subject: [PATCH 11/26] Improved, public version of ErrorRateByCycle -- A cleaner table output (molten). For those interested in seeing how this can be done with GATKReports look here for a nice clean example -- Integration tests -- Minor improvements to GATKReportTable with methods to getPrimaryKeys --- .../sting/gatk/report/GATKReportTable.java | 4 + .../diagnostics/ErrorRatePerCycle.java | 162 ++++++++++++++++++ .../diagnostics/ReadGroupProperties.java | 3 - .../ErrorRatePerCycleIntegrationTest.java | 41 +++++ 4 files changed, 207 insertions(+), 3 deletions(-) create mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index b72b20e0b..b59b550e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -296,6 +296,10 @@ public class GATKReportTable { return primaryKeyColumn.contains(primaryKey); } + public Collection getPrimaryKeys() { + return Collections.unmodifiableCollection(primaryKeyColumn); + } + /** * Set the value for a given position in the table * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java new file mode 100755 index 000000000..e7a2f74e2 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycle.java @@ -0,0 +1,162 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics; + +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.PrintStream; + +/** + * Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine) + * + * Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read + * group in the input BAMs FOR ONLY THE FIRST OF PAIR READS. + * + *

Input

+ *

+ * Any number of BAM files + *

+ * + *

Output

+ *

+ * GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate. + * + * For example, running this tool on the NA12878 data sets: + * + *

+ *      ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
+ *      readgroup  cycle  mismatches  counts  qual  errorrate
+ *      20FUK.1        0          80   23368    25   3.47e-03
+ *      20FUK.1        1          40   23433    28   1.75e-03
+ *      20FUK.1        2          36   23453    28   1.58e-03
+ *      20FUK.1        3          26   23476    29   1.15e-03
+ *      20FUK.1        4          32   23495    29   1.40e-03
+ *      up to 101 cycles
+ *      20FUK.2        0          77   20886    24   3.73e-03
+ *      20FUK.2        1          28   20920    29   1.39e-03
+ *      20FUK.2        2          24   20931    29   1.19e-03
+ *      20FUK.2        3          30   20940    28   1.48e-03
+ *      20FUK.2        4          25   20948    29   1.24e-03
+ *      up to 101 cycles
+ *      20FUK.3        0          78   22038    24   3.58e-03
+ *      20FUK.3        1          40   22091    27   1.86e-03
+ *      20FUK.3        2          23   22108    30   1.09e-03
+ *      20FUK.3        3          36   22126    28   1.67e-03
+ *      
+ *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ErrorRatePerCycle
+ *      -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
+ *      -R bundle/current/b37/human_g1k_v37.fasta
+ *      -o example.gatkreport.txt
+ *  
+ * + * @author Kiran Garimella, Mark DePristo + */ +public class ErrorRatePerCycle extends LocusWalker { + @Output PrintStream out; + @Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false) + public Integer MIN_BASE_QUAL = 0; + @Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false) + public Integer MIN_MAPPING_QUAL = 20; + + private GATKReport report; + private GATKReportTable table; + private final static String reportName = "ErrorRatePerCycle"; + private final static String reportDescription = "The error rate per sequenced position in the reads"; + + /** + * Allows us to use multiple records for the key (read group x cycle) + */ + private static class TableKey implements Comparable { + final String readGroup; + final int cycle; + + private TableKey(final String readGroup, final int cycle) { + this.readGroup = readGroup; + this.cycle = cycle; + } + + @Override + public int compareTo(final TableKey tableKey) { + final int scmp = readGroup.compareTo(tableKey.readGroup); + if ( scmp == 0 ) + return Integer.valueOf(cycle).compareTo(tableKey.cycle); + else + return scmp; + } + } + + public void initialize() { + report = new GATKReport(); + report.addTable(reportName, reportDescription); + table = report.getTable(reportName); + table.addPrimaryKey("key", false); + table.addColumn("readgroup", 0); + table.addColumn("cycle", 0); + table.addColumn("mismatches", 0); + table.addColumn("counts", 0); + table.addColumn("qual", 0); + table.addColumn("errorrate", 0.0f, "%.2e"); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + for ( final PileupElement p : context.getBasePileup() ) { + final GATKSAMRecord read = p.getRead(); + final int offset = p.getOffset(); + final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag(); + + if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) { + final byte readBase = p.getBase(); + final byte refBase = ref.getBase(); + final int cycle = offset; + + if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) { + final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle); + + if ( ! table.containsKey(key) ) { + table.set(key, "cycle", cycle); + table.set(key, "readgroup", read.getReadGroup().getReadGroupId()); + } + + table.increment(key, "counts"); + if (readBase != refBase) + table.increment(key, "mismatches"); + } + } + } + + return null; + } + + public Integer reduceInit() { return null; } + + public Integer reduce(Integer value, Integer sum) { return null; } + + public void onTraversalDone(Integer sum) { + for ( final Object key : table.getPrimaryKeys() ) { + final int mismatches = (Integer)table.get(key, "mismatches"); + final int count = (Integer)table.get(key, "counts"); + final double errorRate = (mismatches + 1) / (1.0*(count + 1)); + final int qual = QualityUtils.probToQual(1-errorRate, 0.0); + table.set(key, "qual", qual); + table.set(key, "errorrate", errorRate); + } + + report.print(out); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java index d7a48d321..14985907d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/ReadGroupProperties.java @@ -94,9 +94,6 @@ import java.util.Map; * * @author Mark DePristo */ - - - public class ReadGroupProperties extends ReadWalker { @Output public PrintStream out; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java new file mode 100644 index 000000000..accb9c0cf --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diagnostics; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class ErrorRatePerCycleIntegrationTest extends WalkerTest { + @Test + public void basicTest() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s", + 1, + Arrays.asList("0cc212ecb6df300e321784039ff29f13")); + executeTest("ErrorRatePerCycle:", spec); + } +} \ No newline at end of file From fbd2f04a04c63726024516ddeadf711afea7e8f5 Mon Sep 17 00:00:00 2001 From: Andrey Sivachenko Date: Wed, 7 Mar 2012 17:29:42 -0500 Subject: [PATCH 13/26] JEXL support added; intermediate commit, not yet functional --- .../indels/SomaticIndelDetectorWalker.java | 66 ++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index aa9ae1517..4247ab7a2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -26,6 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.*; +import org.apache.commons.jexl2.Expression; +import org.apache.commons.jexl2.JexlContext; +import org.apache.commons.jexl2.JexlEngine; +import org.apache.commons.jexl2.MapContext; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -178,6 +182,10 @@ public class SomaticIndelDetectorWalker extends ReadWalker { "GENOMIC/UTR/INTRON/CODING and with the gene name", required=false) String RefseqFileName = null; + + @Argument(shortName="filter", doc="One or more criteria to use when selecting the data", required=false) + public ArrayList FILTER_EXPRESSIONS = new ArrayList(); + //@Argument(fullName="blacklistedLanes", shortName="BL", // doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+ // "by this application, so they will not contribute indels to consider and will not be counted.", required=false) @@ -221,7 +229,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { private Writer verboseWriter = null; - private static String annGenomic = "GENOMIC"; + private static String annGenomic = "GENOMIC\t"; private static String annIntron = "INTRON"; private static String annUTR = "UTR"; private static String annCoding = "CODING"; @@ -245,6 +253,32 @@ public class SomaticIndelDetectorWalker extends ReadWalker { private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed; // can be 1 base before lastGenotyped start + private JexlEngine jexlEngine = new JexlEngine(); + private ArrayList jexlExpressions = new ArrayList(); + + // the following arrays store indel source-specific (normal/tumor) metric names + // for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext()) + private final static String[] normalMetricsCassette = new String[4]; + private final static String[] tumorMetricsCassette = new String[4]; + private final static String[] singleMetricsCassette = new String[4]; + private final static int C_COV=0; + private final static int C_CONS_CNT=1; + private final static int C_INDEL_F=2; + private final static int C_INDEL_CF=3; + static { + normalMetricsCassette[C_COV] = "N_COV"; + tumorMetricsCassette[C_COV] = "T_COV"; + singleMetricsCassette[C_COV] = "COV"; + normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT"; + tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT"; + singleMetricsCassette[C_CONS_CNT] = "CONS_CNT"; + normalMetricsCassette[C_INDEL_F] = "N_INDEL_F"; + tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F"; + singleMetricsCassette[C_INDEL_F] = "INDEL_F"; + normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF"; + tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF"; + singleMetricsCassette[C_INDEL_CF] = "INDEL_CF"; + } // "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt" @@ -389,6 +423,17 @@ public class SomaticIndelDetectorWalker extends ReadWalker { vcf_writer.writeHeader(new VCFHeader(getVCFHeaderInfo(), SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()))) ; refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile); + + // Now initialize JEXL expressions: + for ( String s : FILTER_EXPRESSIONS ) { + try { + Expression e = jexlEngine.createExpression(s); + jexlExpressions.add(e); + } catch (Exception e) { + throw new UserException.BadArgumentValue("Filter expression", "Invalid expression used (" + s + "). Please see the JEXL docs for correct syntax.") ; + } + + } } @@ -829,6 +874,15 @@ public class SomaticIndelDetectorWalker extends ReadWalker { IndelPrecall tumorCall = new IndelPrecall(tumor_context,pos,NQS_WIDTH); IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH); + JexlContext jc = new MapContext(); + tumorCall.fillContext(jc,tumorMetricsCassette); + normalCall.fillContext(jc,normalMetricsCassette); + boolean result = false; + + for ( Expression e : jexlExpressions ) { + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { result=true; break; } + } + if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { if ( DEBUG ) { System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); @@ -1602,6 +1656,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker { public IndelVariant getVariant() { return consensus_indel; } + public void fillContext(JexlContext context,String[] cassette) { + context.set(cassette[C_INDEL_F],((double)consensus_indel_count)/total_coverage); + context.set(cassette[C_INDEL_CF],((double)consensus_indel_count/all_indel_count)); + context.set(cassette[C_COV],total_coverage); + context.set(cassette[C_CONS_CNT],consensus_indel_count); + } + public boolean isCall() { boolean ret = ( consensus_indel_count >= minIndelCount && (double)consensus_indel_count > minFraction * total_coverage && @@ -1610,8 +1671,9 @@ public class SomaticIndelDetectorWalker extends ReadWalker { " total_count="+all_indel_count+" cov="+total_coverage+ " minConsensusF="+((double)consensus_indel_count)/all_indel_count+ " minF="+((double)consensus_indel_count)/total_coverage); - return ret; + return ret; +// return true; } /** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed From 497a1b059ef906d2f085b77a88732ff9eeca03d3 Mon Sep 17 00:00:00 2001 From: Andrey Sivachenko Date: Wed, 7 Mar 2012 18:34:11 -0500 Subject: [PATCH 14/26] transition to JEXL completed, old parameters setting individual cutoffs now deprecated --- .../indels/SomaticIndelDetectorWalker.java | 155 ++++++++++-------- 1 file changed, 90 insertions(+), 65 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 4247ab7a2..733d32e3d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -151,30 +151,44 @@ public class SomaticIndelDetectorWalker extends ReadWalker { doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false) java.io.File bedOutput = null; + @Deprecated @Argument(fullName="minCoverage", shortName="minCoverage", doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+ - "with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false) + "with --unpaired (single sample) option, this value is used for minimum sample coverage. "+ + "INSTEAD USE: T_COV { String RefseqFileName = null; - @Argument(shortName="filter", doc="One or more criteria to use when selecting the data", required=false) + @Argument(shortName="filter", doc="One or more logical expressions. If any of the expressions is TRUE, " + + "putative indel will be discarded and nothing will be printed into the output (unless genotyping "+ + "at the specific position is explicitly requested, see -genotype). "+ + "Default: T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7", required=false) public ArrayList FILTER_EXPRESSIONS = new ArrayList(); //@Argument(fullName="blacklistedLanes", shortName="BL", @@ -425,6 +442,13 @@ public class SomaticIndelDetectorWalker extends ReadWalker { refData = new ReferenceDataSource(getToolkit().getArguments().referenceFile); // Now initialize JEXL expressions: + if ( FILTER_EXPRESSIONS.size() == 0 ) { + if ( call_unpaired ) { + FILTER_EXPRESSIONS.add("COV<6||INDEL_F<0.3||INDEL_CF<0.7"); + } else { + FILTER_EXPRESSIONS.add("T_COV<6||N_COV<4||T_INDEL_F<0.3||T_INDEL_CF<0.7"); + } + } for ( String s : FILTER_EXPRESSIONS ) { try { Expression e = jexlEngine.createExpression(s); @@ -706,14 +730,26 @@ public class SomaticIndelDetectorWalker extends ReadWalker { if ( normal_context.indelsAt(pos).size() == 0 && ! genotype ) continue; IndelPrecall normalCall = new IndelPrecall(normal_context,pos,NQS_WIDTH); + JexlContext jc = new MapContext(); + normalCall.fillContext(jc,singleMetricsCassette); + boolean discard_event = false; - if ( normalCall.getCoverage() < minCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + for ( Expression e : jexlExpressions ) { + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; } } + if ( discard_event && ! genotype ) { + normal_context.indelsAt(pos).clear(); + continue; //* + } + +// if ( normalCall.getCoverage() < minCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } + if ( DEBUG ) System.out.println("DEBUG>> "+(normalCall.getAllVariantCount() == 0?"No Indel":"Indel")+" at "+pos); long left = Math.max( pos-NQS_WIDTH, normal_context.getStart() ); @@ -742,24 +778,16 @@ public class SomaticIndelDetectorWalker extends ReadWalker { location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(), pos); - boolean haveCall = normalCall.isCall(); // cache the value - - if ( haveCall || genotype) { - if ( haveCall ) normalCallsMade++; - printVCFLine(vcf_writer,normalCall); - if ( bedWriter != null ) normalCall.printBedLine(bedWriter); - if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall); - lastGenotypedPosition = pos; - } + if ( ! discard_event ) normalCallsMade++; + printVCFLine(vcf_writer,normalCall, discard_event); + if ( bedWriter != null ) normalCall.printBedLine(bedWriter); + if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, discard_event); + lastGenotypedPosition = pos; normal_context.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again // (we might otherwise in the case when 1) there is another indel that follows // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) - -// for ( IndelVariant var : variants ) { -// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); -// } } if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")"); @@ -877,24 +905,29 @@ public class SomaticIndelDetectorWalker extends ReadWalker { JexlContext jc = new MapContext(); tumorCall.fillContext(jc,tumorMetricsCassette); normalCall.fillContext(jc,normalMetricsCassette); - boolean result = false; + boolean discard_event = false; for ( Expression e : jexlExpressions ) { - if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { result=true; break; } + if ( ((Boolean)e.evaluate(jc)).booleanValue() ) { discard_event=true; break; } } - if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage - } - if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) { - if ( DEBUG ) { - System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); - } - continue; // low coverage + if ( discard_event && ! genotype ) { + tumor_context.indelsAt(pos).clear(); + normal_context.indelsAt(pos).clear(); + continue; //* } +// if ( tumorCall.getCoverage() < minCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in tumor="+tumorCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } +// if ( normalCall.getCoverage() < minNormalCoverage && ! genotype ) { +// if ( DEBUG ) { +// System.out.println("DEBUG>> Indel in tumor at "+pos+"; coverare in normal="+normalCall.getCoverage()+" (SKIPPED)"); +// } +// continue; // low coverage +// } if ( DEBUG ) { System.out.print("DEBUG>> "+(tumorCall.getAllVariantCount() == 0?"No Indel":"Indel")+" in tumor, "); @@ -922,32 +955,24 @@ public class SomaticIndelDetectorWalker extends ReadWalker { if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right -// location = getToolkit().getGenomeLocParser().setStart(location,pos); -// location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data - location = getToolkit().getGenomeLocParser().createGenomeLoc(location.getContig(),pos); // retrieve annotation data - boolean haveCall = tumorCall.isCall(); // cache the value +// boolean haveCall = tumorCall.isCall(); // cache the value - if ( haveCall || genotype ) { - if ( haveCall ) tumorCallsMade++; + if ( ! discard_event ) tumorCallsMade++; - printVCFLine(vcf_writer,normalCall,tumorCall); + printVCFLine(vcf_writer,normalCall,tumorCall,discard_event); - if ( bedWriter != null ) tumorCall.printBedLine(bedWriter); + if ( bedWriter != null ) tumorCall.printBedLine(bedWriter); + + if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall, discard_event ); + lastGenotypedPosition = pos; - if ( verboseWriter != null ) printVerboseLine(verboseWriter, normalCall, tumorCall ); - lastGenotypedPosition = pos; - } tumor_context.indelsAt(pos).clear(); normal_context.indelsAt(pos).clear(); // we dealt with this indel; don't want to see it again // (we might otherwise in the case when 1) there is another indel that follows // within MISMATCH_WIDTH bases and 2) we'd need to wait for more coverage for that next indel) - -// for ( IndelVariant var : variants ) { -// System.out.print("\t"+var.getType()+"\t"+var.getBases()+"\t"+var.getCount()); -// } } if ( DEBUG ) System.out.println("DEBUG>> Actual shift to " + move_to + " ("+adjustedPosition+")"); @@ -1001,14 +1026,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } - public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall) { + public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, boolean discard_event) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); StringBuilder fullRecord = new StringBuilder(); fullRecord.append(makeFullRecord(normalCall)); fullRecord.append(annotationString); - if ( ! normalCall.isCall() && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); + if ( discard_event && normalCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); try { verboseWriter.write(fullRecord.toString()); verboseWriter.write('\n'); @@ -1019,7 +1044,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } - public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall) { + public void printVerboseLine(Writer verboseWriter, IndelPrecall normalCall, IndelPrecall tumorCall, boolean discard_event) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(location)); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); @@ -1067,7 +1092,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { fullRecord.append('\t'); fullRecord.append(annotationString); - if ( ! tumorCall.isCall() && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); + if ( discard_event && tumorCall.getVariant() != null ) fullRecord.append("\tFILTERED_NOCALL"); try { verboseWriter.write(fullRecord.toString()); @@ -1077,7 +1102,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } } - public void printVCFLine(VCFWriter vcf, IndelPrecall call) { + public void printVCFLine(VCFWriter vcf, IndelPrecall call, boolean discard_event) { long start = call.getPosition()-1; // If the beginning of the chromosome is deleted (possible, however unlikely), it's unclear how to proceed. @@ -1114,14 +1139,14 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Map attrs = call.makeStatsAttributes(null); - if ( call.isCall() ) // we made a call - put actual het genotype here: + if ( ! discard_event ) // we made a call - put actual het genotype here: genotypes.add(new Genotype(sample,alleles,Genotype.NO_LOG10_PERROR,null,attrs,false)); else // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) genotypes.add(new Genotype(sample, homref_alleles,Genotype.NO_LOG10_PERROR,null,attrs,false)); } Set filters = null; - if ( call.getVariant() != null && ! call.isCall() ) { + if ( call.getVariant() != null && discard_event ) { filters = new HashSet(); filters.add("NoCall"); } @@ -1149,7 +1174,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } } - public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall) { + public void printVCFLine(VCFWriter vcf, IndelPrecall nCall, IndelPrecall tCall, boolean discard_event) { long start = tCall.getPosition()-1; long stop = start; @@ -1166,7 +1191,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { Map attrs = new HashMap(); boolean isSomatic = false; - if ( nCall.getCoverage() >= minNormalCoverage && nCall.getVariant() == null && tCall.getVariant() != null ) { + if ( nCall.getVariant() == null && tCall.getVariant() != null ) { isSomatic = true; attrs.put(VCFConstants.SOMATIC_KEY,true); } @@ -1209,7 +1234,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } Set filters = null; - if ( tCall.getVariant() != null && ! tCall.isCall() ) { + if ( tCall.getVariant() != null && discard_event ) { filters = new HashSet(); filters.add("NoCall"); } @@ -1662,7 +1687,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { context.set(cassette[C_COV],total_coverage); context.set(cassette[C_CONS_CNT],consensus_indel_count); } - +/* public boolean isCall() { boolean ret = ( consensus_indel_count >= minIndelCount && (double)consensus_indel_count > minFraction * total_coverage && @@ -1675,7 +1700,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { return ret; // return true; } - +*/ /** Utility method: finds the indel variant with the largest count (ie consensus) among all the observed * variants, and sets the counts of consensus observations and all observations of any indels (including non-consensus) * @param variants From 56f074b520e83e13ead8bb12d57541f12491498a Mon Sep 17 00:00:00 2001 From: Andrey Sivachenko Date: Wed, 7 Mar 2012 18:47:15 -0500 Subject: [PATCH 15/26] docs updated --- .../walkers/indels/SomaticIndelDetectorWalker.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 733d32e3d..59a7bd01a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -75,7 +75,7 @@ import java.util.*; *

* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing * data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs - * include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many + * include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many * forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional * statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will * attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional @@ -92,6 +92,16 @@ import java.util.*; * bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged * on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups). * + * Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter + * flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the + * associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample + * somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV): + * COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage), + * INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of + * reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance, + * N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed + * allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites + * where tumor coverage and normal coverage are at least 6 and 4, respectively. *

Input

*

* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode). From 858acf86165fe0466529a16ddb4d0a25647a69e8 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 8 Mar 2012 12:29:44 -0500 Subject: [PATCH 16/26] Hidden mode in ValidationAmplicons to support ILMN output format (same as Sequenom, with just shuffled columns) --- .../validation/ValidationAmplicons.java | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index e812fb53a..1d7f92242 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -134,6 +134,10 @@ public class ValidationAmplicons extends RodWalker { @Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false) boolean sequenomOutput = false; + @Hidden + @Argument(doc="Use ILMN output format instead of regular FASTA",fullName="ilmn",required=false) + boolean ilmnOutput = false; + GenomeLoc prevInterval; GenomeLoc allelePos; @@ -141,6 +145,7 @@ public class ValidationAmplicons extends RodWalker { StringBuilder sequence; StringBuilder rawSequence; boolean sequenceInvalid; + boolean isSiteSNP; List invReason; int indelCounter; @@ -169,6 +174,9 @@ public class ValidationAmplicons extends RodWalker { header.setSequenceDictionary(referenceDictionary); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); } + + if (ilmnOutput) + out.println("Locus_Name,Target_Type,Sequence,Chromosome,Coordinate,Genome_Build_Version,Source,Source_Version,Sequence_Orientation,Plus_Minus,Force_Infinium_I"); } public Integer reduceInit() { @@ -234,6 +242,8 @@ public class ValidationAmplicons extends RodWalker { } rawSequence.append(Character.toUpperCase((char) ref.getBase())); } else if ( validate != null ) { + // record variant type in case it's needed in output format + isSiteSNP = (validate.isSNP()); // doesn't matter if there's a mask here too -- this is what we want to validate if ( validate.isFiltered() ) { logger.warn("You are attempting to validate a filtered site. Why are you attempting to validate a filtered site? You should not be attempting to validate a filtered site."); @@ -496,13 +506,19 @@ public class ValidationAmplicons extends RodWalker { if (!onlyOutputValidAmplicons || !sequenceInvalid) { String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); - if (!sequenomOutput) - out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); - else { + if (sequenomOutput) { seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record probeName = probeName.replace("amplicon_","a"); out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); } + else if (ilmnOutput) { + String type = isSiteSNP?"SNP":"INDEL"; + seqIdentity = seqIdentity.replace("*",""); // no * in ref allele + out.printf("%s,%s,%s,%s,%d,37,1000G,ExomePhase1,Forward,Plus,FALSE%n",probeName,type,seqIdentity,allelePos.getContig(),allelePos.getStart()); + } + else{ + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + } } } } From 32dee7ed9bd0c64bdd6dae5de26b751dea05750c Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 8 Mar 2012 12:52:11 -0500 Subject: [PATCH 17/26] Avoid buffer underflow in GATKBAMIndex by detecting premature EOF in BAM indices GATKBAMIndex would allow an extremely confusing BufferUnderflowException to be thrown when a BAM index file was truncated or corrupt. Now, a UserException is thrown in this situation instructing the user to re-index the BAM. Added a unit test for this case as well. --- .../sting/gatk/datasources/reads/GATKBAMIndex.java | 14 +++++++++++++- .../datasources/reads/GATKBAMIndexUnitTest.java | 13 +++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 244438a59..2bf75b035 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.FileInputStream; @@ -349,7 +350,18 @@ public class GATKBAMIndex { private void read(final ByteBuffer buffer) { try { - fileChannel.read(buffer); + int bytesExpected = buffer.limit(); + int bytesRead = fileChannel.read(buffer); + + // We have a rigid expectation here to read in exactly the number of bytes we've limited + // our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index + // must be truncated or otherwise corrupt: + if ( bytesRead < bytesExpected ) { + throw new UserException.MalformedFile(mFile, String.format("Premature end-of-file while reading BAM index file %s. " + + "It's likely that this file is truncated or corrupt -- " + + "Please try re-indexing the corresponding BAM file.", + mFile)); + } } catch(IOException ex) { throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexUnitTest.java index fde0ce62f..8cf9f7ce0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndexUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -91,4 +92,16 @@ public class GATKBAMIndexUnitTest extends BaseTest { Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1); } + @Test( expectedExceptions = UserException.MalformedFile.class ) + public void testDetectTruncatedBamIndexWordBoundary() { + GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_word_boundary.bai")); + index.readReferenceSequence(0); + } + + @Test( expectedExceptions = UserException.MalformedFile.class ) + public void testDetectTruncatedBamIndexNonWordBoundary() { + GATKBAMIndex index = new GATKBAMIndex(new File(validationDataLocation + "truncated_at_non_word_boundary.bai")); + index.readReferenceSequence(0); + } + } From bc65f6326f79d09757461291c017378e6eaa6ffd Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 9 Mar 2012 12:13:53 -0500 Subject: [PATCH 18/26] Detect incomplete reads from BAM schedule file in BAMSchedule before they become buffer underflows This fix is similar, but distinct from the earlier fix to GATKBAMIndex. If we fail to read in a complete 3-integer bin header from the BAM schedule file that the engine has written, throw a ReviewedStingException (since this is our problem, not the user's) rather than allowing a cryptic buffer underflow error to occur. Note that this change does not fix the underlying problem in the engine, if there is one (there may be an as-yet-undetected bug in the code that writes the bam schedule). It will just make it easier for us to identify what's going wrong in the future. --- .../sting/gatk/datasources/reads/BAMSchedule.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java index 657c70aaa..1d8879d51 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java @@ -407,7 +407,14 @@ public class BAMSchedule implements CloseableIterator { position(currentPosition); // Read data. - read(binHeader); + int binHeaderBytesRead = read(binHeader); + + // Make sure we read in a complete bin header: + if ( binHeaderBytesRead < INT_SIZE_IN_BYTES * 3 ) { + throw new ReviewedStingException(String.format("Unable to read a complete bin header from BAM schedule file %s for BAM file %s. " + + "The BAM schedule file is likely incomplete/corrupt.", + scheduleFile.getAbsolutePath(), reader.getSamFilePath())); + } // Decode contents. binHeader.flip(); From 91d10431d395389d137a9571b3fd75d579f241a3 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 9 Mar 2012 15:11:59 -0500 Subject: [PATCH 19/26] BAMScheduler: detect contigs from the interval list that are not in the merged BAM header's sequence dictionary This is a quick-and-dirty patch for the null pointer error Mauricio reported earlier. Later on we might want to address in a more general way the fact that we validate user intervals against the reference but not against the merged BAM header produced by the engine at runtime. --- .../sting/gatk/datasources/reads/BAMScheduler.java | 11 ++++++++++- .../sting/utils/exceptions/UserException.java | 13 ++++--------- .../broadinstitute/sting/utils/sam/ReadUtils.java | 8 ++++++++ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index bcb726607..fdc3d2aa7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -34,6 +34,8 @@ import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; @@ -245,7 +247,14 @@ public class BAMScheduler implements Iterator { // This will ensure that if the two sets of contigs don't quite match (b36 male vs female ref, hg19 Epstein-Barr), then // we'll be using the correct contig index for the BAMs. // TODO: Warning: assumes all BAMs use the same sequence dictionary! Get around this with contig aliasing. - final int currentContigIndex = dataSource.getHeader().getSequence(currentLocus.getContig()).getSequenceIndex(); + SAMSequenceRecord currentContigSequenceRecord = dataSource.getHeader().getSequence(currentLocus.getContig()); + if ( currentContigSequenceRecord == null ) { + throw new UserException(String.format("Contig %s not present in sequence dictionary for merged BAM header: %s", + currentLocus.getContig(), + ReadUtils.prettyPrintSequenceRecords(dataSource.getHeader().getSequenceDictionary()))); + } + + final int currentContigIndex = currentContigSequenceRecord.getSequenceIndex(); // Stale reference sequence or first invocation. (Re)create the binTreeIterator. if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentContigIndex) { diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 6cc8008d2..d625cec20 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; @@ -273,7 +274,7 @@ public class UserException extends ReviewedStingException { public static class IncompatibleSequenceDictionaries extends UserException { public IncompatibleSequenceDictionaries(String message, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) { super(String.format("Input files %s and %s have incompatible contigs: %s.\n %s contigs = %s\n %s contigs = %s", - name1, name2, message, name1, prettyPrintSequenceRecords(dict1), name2, prettyPrintSequenceRecords(dict2))); + name1, name2, message, name1, ReadUtils.prettyPrintSequenceRecords(dict1), name2, ReadUtils.prettyPrintSequenceRecords(dict2))); } } @@ -284,17 +285,11 @@ public class UserException extends ReviewedStingException { + "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files." + "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam" + "\n %s contigs = %s", - name, name, prettyPrintSequenceRecords(dict))); + name, name, ReadUtils.prettyPrintSequenceRecords(dict))); } } - private static String prettyPrintSequenceRecords(SAMSequenceDictionary sequenceDictionary) { - String[] sequenceRecordNames = new String[sequenceDictionary.size()]; - int sequenceRecordIndex = 0; - for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences()) - sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName(); - return Arrays.deepToString(sequenceRecordNames); - } + public static class MissingWalker extends UserException { public MissingWalker(String walkerName, String message) { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index d1e3ce26b..91389f0bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -648,4 +648,12 @@ public class ReadUtils { } return new Pair>, HashMap>(locusToReadMap, readToLocusMap); } + + public static String prettyPrintSequenceRecords ( SAMSequenceDictionary sequenceDictionary ) { + String[] sequenceRecordNames = new String[sequenceDictionary.size()]; + int sequenceRecordIndex = 0; + for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences()) + sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName(); + return Arrays.deepToString(sequenceRecordNames); + } } From 1011f3862ba7ac3d7b4cebb4f28e1603c84be6f1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 8 Mar 2012 08:57:29 -0500 Subject: [PATCH 21/26] CalibrateGenotypeLikelihoods now emits the position of the variant for debugging -- Refactored some duplicated code (FYI, code duplication = root of all evil) into shared functions -- Added long-missing integrationtests -- CHRIS/RYAN -- it would be very good to add an integration test covering external VCF files as I believe we rely on this functionality and it's not tested at all --- public/java/test/org/broadinstitute/sting/BaseTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index bc4ce098b..e33f6717a 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -61,6 +61,8 @@ public abstract class BaseTest { public static final String annotationDataLocation = GATKDataLocation + "Annotations/"; public static final String b37GoodBAM = validationDataLocation + "/CEUTrio.HiSeq.b37.chr20.10_11mb.bam"; + public static final String b37GoodNA12878BAM = validationDataLocation + "/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam"; + public static final String b37_NA12878_OMNI = validationDataLocation + "/NA12878.omni.vcf"; public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/"; public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt"; From 3ba2e5667c30190096504ebe6d505c78d9eee3d9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 8 Mar 2012 09:43:24 -0500 Subject: [PATCH 22/26] CalibrateGenotypesLikelihoods include pOfDGivenD now --- .../java/src/org/broadinstitute/sting/utils/QualityUtils.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 9722f901b..7756ac71b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -10,6 +10,8 @@ import net.sf.samtools.SAMUtils; */ public class QualityUtils { public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE; + public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE); + public final static double MIN_REASONABLE_ERROR = 0.0001; public final static byte MAX_REASONABLE_Q_SCORE = 40; public final static byte MIN_USABLE_Q_SCORE = 6;