diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java new file mode 100755 index 000000000..b5f7d7233 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -0,0 +1,79 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.SampleBacked; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; + +import java.util.Map; +import java.util.List; +import java.util.ArrayList; + + +public class QualByDepth extends StandardVariantAnnotation { + + public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { + if ( !(variation instanceof VariantBackedByGenotype) ) + return null; + final List genotypes = ((VariantBackedByGenotype)variation).getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) + return null; + + //double QbyD = genotypeQualByDepth(ref.getBase(), genotypes, stratifiedContexts); + double QbyD = 10.0 * variation.getNegLog10PError() / (double)variationQualByDepth(ref.getBase(), genotypes, stratifiedContexts); + + return String.format("%.2f", QbyD); + } + + public String getKeyName() { return "QD"; } + + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Genotype Quality by Depth"); } + + private int variationQualByDepth(char ref, final List genotypes, Map stratifiedContexts) { + int depth = 0; + for ( Genotype genotype : genotypes ) { + if ( !(genotype instanceof SampleBacked) ) + continue; + + // we care only about variant calls + if ( !genotype.isVariant(ref) ) + continue; + + String sample = ((SampleBacked)genotype).getSampleName(); + StratifiedAlignmentContext context = stratifiedContexts.get(sample); + if ( context != null ) + depth += context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size(); + } + + return depth; + } + + private double genotypeQualByDepth(char ref, final List genotypes, Map stratifiedContexts) { + ArrayList qualsByDepth = new ArrayList(); + for ( Genotype genotype : genotypes ) { + if ( !(genotype instanceof SampleBacked) ) + continue; + + // we care only about variant calls + if ( !genotype.isVariant(ref) ) + continue; + + String sample = ((SampleBacked)genotype).getSampleName(); + StratifiedAlignmentContext context = stratifiedContexts.get(sample); + if ( context == null ) + continue; + + qualsByDepth.add(genotype.getNegLog10PError() / context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); + } + + double mean = 0.0; + for ( Double value : qualsByDepth ) + mean += value; + mean /= qualsByDepth.size(); + + return mean; + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index b8e2a0788..949f6778d 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("da1e19a969cd79779ba401062d2fcf78")); + Arrays.asList("b0012ffce509196885bfdf1612086263")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("592e6eb532fa7b0250bfa410b62b1ae4")); + Arrays.asList("7e00e94a046e82ac41f6fd298a64cc74")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("0413be573f447450222a48223dd10095")); + Arrays.asList("a6630eec84161a089dde204e09744c45")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("0f0ef13834e2ef00fa2ea53ad82450b4")); + Arrays.asList("130658564083f650981b28728b813917")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 6244854a1..49c9a175d 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("b2e7854a474fbd24add057673ba469f1")); + Arrays.asList("caeb030b47503e9d79cf1e18b86e8bc9")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("c1ea2a36a7dacb278d82be9963dc6053")); + Arrays.asList("f87c182b694a7baeab886d8f75c91e28")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("c2364a23fcca9d08c7424e7a68c3ba04")); + Arrays.asList("acf8006174a460247fabbc650802c29b")); executeTest("testPooled1", spec); } @@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("293ea49e8c6854aa04099a48293a2c65")); + Arrays.asList("92b32599938bc60d6d636b425c5e0a6c")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("44362bd78c9559c194149fa0be20004b")); + Arrays.asList("cc90c3dd5d30dd4ac0fceb748283ddb9")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("9013c944a894d1d941167c2d1d57f0f9")); + Arrays.asList("d04c02cdbf1e1adbdf84540c861a64f7")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -85,11 +85,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "9f7886f0f63de56ebcd187c22725b1f7" ); - e.put( "-all_bases", "17f08368b48ebb5e18063da47f689e4e" ); - e.put( "--min_base_quality_score 10", "cdac609651438aa8e74cb859aa04e2f0" ); - e.put( "--min_mapping_quality_score 10", "ee1afd51eda0d005e5b3e908af63eb27" ); - e.put( "--max_mismatches_in_40bp_window 5", "b9bec75e1aa4c3f45e0a88ee74b41321" ); + e.put( "-genotype", "ce888ec4bdd85e24b1129198327ff315" ); + e.put( "-all_bases", "95be4777a8f89f98d0a033fc3d75a3c1" ); + e.put( "--min_base_quality_score 10", "390802b120cba6019af39d4775f504d1" ); + e.put( "--min_mapping_quality_score 10", "f7fe79dace81157bc83c8e4d27e1ae40" ); + e.put( "--max_mismatches_in_40bp_window 5", "0e7741a7a683a6d4d00876372bb70991" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -103,7 +103,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("022cf14445f7cd6beb04957c8fea9ae5")); + Arrays.asList("c8c4a463ab23585d8373f3e8a7fbec22")); executeTest("testConfidence", spec); }