From ce3d2261836418760ae6fc39838970b09a214f3d Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 13 Jan 2011 14:12:30 +0000 Subject: [PATCH] Reverting back to the old definition of QD because it works better with large numbers of samples. The new QD is relegated to a new annotation: sumGLbyD. Tweaks to the new HaplotypeScore based on evaluation with better QD calculation. The default qual threshold in GenerateVariantClusters is updated to be in line with the variant quality scores coming from the exact model. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4984 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/HaplotypeScore.java | 2 +- .../gatk/walkers/annotator/QualByDepth.java | 13 +++++--- .../GenerateVariantClustersWalker.java | 2 +- .../sting/utils/pileup/PileupElement.java | 6 +--- .../sting/utils/sam/AlignmentUtils.java | 11 ------- .../VariantAnnotatorIntegrationTest.java | 10 +++--- .../UnifiedGenotyperIntegrationTest.java | 32 +++++++++---------- 7 files changed, 33 insertions(+), 43 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index ed5d2360f..3719dceb2 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { private final static boolean DEBUG = false; - private final static int MIN_CONTEXT_WING_SIZE = 20; + private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static char REGEXP_WILDCARD = '.'; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index d0d0cd6ef..7046da8ea 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -18,7 +18,7 @@ import java.util.List; import java.util.Arrays; -public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation { +public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) @@ -55,13 +55,18 @@ public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation { if ( qual == 0.0 ) qual = 10.0 * vc.getNegLog10PError(); - double QbyD = qual / (double)depth; + double sumGLbyD = qual / (double)depth; + + int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts); + double QD = 10.0 * vc.getNegLog10PError() / (double)qDepth; + Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%.2f", QbyD)); + map.put(getKeyNames().get(0), String.format("%.2f", QD)); + map.put(getKeyNames().get(1), String.format("%.2f", sumGLbyD)); return map; } - public List getKeyNames() { return Arrays.asList("QD"); } + public List getKeyNames() { return Arrays.asList("QD", "sumGLbyD"); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index 9818e6058..83aec2b93 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -84,7 +84,7 @@ public class GenerateVariantClustersWalker extends RodWalker 0 ) { - if(alignment[alignPos-1] == (byte) 'A') { - alignment[alignPos-1] = PileupElement.INSERTION_BASE_A; - } else if(alignment[alignPos-1] == (byte) 'C') { - alignment[alignPos-1] = PileupElement.INSERTION_BASE_C; - } else if(alignment[alignPos-1] == (byte) 'T') { - alignment[alignPos-1] = PileupElement.INSERTION_BASE_T; - } else if(alignment[alignPos-1] == (byte) 'G') { - alignment[alignPos-1] = PileupElement.INSERTION_BASE_G; - } - } case S: for ( int jjj = 0; jjj < elementLength; jjj++ ) { readPos++; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 6545128e0..cfebe7dd2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("98861116ac1cdaf152adead3183664d8")); + Arrays.asList("c62ee353f7ddafa2348b461d9e107f2f")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("53d083505fc82fb388566d3856ed20ba")); + Arrays.asList("0c43cfb9b08f63f49396e3bf8b3d843f")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("c5807c5794c3a847d78e3800553d989a")); + Arrays.asList("3aa1415f7bc6d41c4f14343187377f96")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("a9b2a7adee7ba8b0f5d7ff8d92e6dfbd")); + Arrays.asList("bf3bc299552b595f791486a1f8dbc509")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("e3c839910aa82061742e33196b112cb0")); + Arrays.asList("28829de1afd9dcb41ed666f949c4f5cc")); executeTest("test overwriting header", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index dba3eb8fe..54fec9cb0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -25,7 +25,7 @@ public class public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("ae901e034b00aef16d36295821b3ea63")); + Arrays.asList("6c04cc01cf6bebe4bdbc025fd45c5559")); executeTest("testMultiSamplePilot1", spec); } @@ -33,7 +33,7 @@ public class public void testMultiSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, - Arrays.asList("2ad026dee3fe592c124eb8724a843a5e")); + Arrays.asList("84efe068164891dbec7c85ff6cc33df3")); executeTest("testMultiSamplePilot2", spec); } @@ -41,7 +41,7 @@ public class public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("bc573090407ae9ec4401757eaa03de20")); + Arrays.asList("c7decebadb35067a38b9be3c43c1fb76")); executeTest("testSingleSamplePilot2", spec); } @@ -51,7 +51,7 @@ public class // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "1889ecc5aa1b23b2e77b1bd192577f1a"; + private final static String COMPRESSED_OUTPUT_MD5 = "66bcb3f7b20d4fbe7849b616bfe69c0f"; @Test public void testCompressedOutput() { @@ -78,7 +78,7 @@ public class @Test public void testParallelization() { - String md5 = "d19745ab31f903de8d5a8e853b4e52dd"; + String md5 = "a0032a9952c94a55e0fa42e2dec33169"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -105,12 +105,12 @@ public class @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "37d3954e19309a24c386758afad93252" ); - e.put( "-all_bases", "04568093c5dc70fa7965b4ab15fd0f7e" ); - e.put( "--min_base_quality_score 26", "5d1886a9637183707645bc2dc6bf8282" ); - e.put( "--min_mapping_quality_score 26", "78423524cf56cce1d0847847d542459f" ); - e.put( "--max_mismatches_in_40bp_window 5", "2963c771aafe84b62082f475d20bad5e" ); - e.put( "--p_nonref_model GRID_SEARCH", "c254a4e593b4ffb112299be874503ce0" ); + e.put( "-genotype", "4a623d4a68fba4f9f8d7e915af0cc450" ); + e.put( "-all_bases", "2ab5106795c70a63d8dd2353ee0f1427" ); + e.put( "--min_base_quality_score 26", "9c9be22923b9ba0d588f3d37385ae4b0" ); + e.put( "--min_mapping_quality_score 26", "aa36bb2f4fc5bd6b6c4206e0a084a577" ); + e.put( "--max_mismatches_in_40bp_window 5", "4914ed25a8ca22c7aea983999cd6768a" ); + e.put( "--p_nonref_model GRID_SEARCH", "dd4fb1fb304e44b82c9cc3d4cc257172" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -124,12 +124,12 @@ public class public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("c254a4e593b4ffb112299be874503ce0")); + Arrays.asList("dd4fb1fb304e44b82c9cc3d4cc257172")); executeTest("testConfidence1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("d2323a0234d27257393f4931fca70dbc")); + Arrays.asList("eef4e314d0cdae669454232ffbbab8ea")); executeTest("testConfidence2", spec2); } @@ -141,8 +141,8 @@ public class @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "96e85b26cf5f0a523b4b0886dbb902b1" ); - e.put( 1.0 / 1850, "5ffecb68b52169ded04312aa5dcdc137" ); + e.put( 0.01, "1bbd2e24ddec902339eac481565d7f0a" ); + e.put( 1.0 / 1850, "472d2d6c375a7c6d2edb464a12f10742" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -165,7 +165,7 @@ public class " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("e65c95a9d2a0995078d3e6835cf4ee61")); + Arrays.asList("eb4144006eda780d69b6979817ceb58d")); executeTest(String.format("testMultiTechnologies"), spec); }