diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 9fc7879f8..86d95ee20 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -21,8 +21,8 @@ public class AlleleBalance extends StandardVariantAnnotation { if ( genotypes == null || genotypes.size() == 0 ) return null; - int refCount = 0; - int altCount = 0; + double ratio = 0.0; + double totalWeights = 0.0; for ( Genotype genotype : genotypes ) { // we care only about het calls if ( !genotype.isHet() ) @@ -49,17 +49,23 @@ public class AlleleBalance extends StandardVariantAnnotation { int aCount = Utils.countOccurrences(a, bases); int bCount = Utils.countOccurrences(b, bases); + int refCount = (a == ref.getBase() ? aCount : bCount); + int altCount = (a == ref.getBase() ? bCount : aCount); + + // sanity check + if ( refCount + altCount == 0 ) + continue; + // weight the allele balance by genotype quality so that e.g. mis-called homs don't affect the ratio too much - refCount += genotype.getNegLog10PError() * (a == ref.getBase() ? aCount : bCount); - altCount += genotype.getNegLog10PError() * (a == ref.getBase() ? bCount : aCount); + ratio += genotype.getNegLog10PError() * ((double)refCount / (double)(refCount + altCount)); + totalWeights += genotype.getNegLog10PError(); } - // sanity check - if ( refCount + altCount == 0 ) + // make sure we had a het genotype + if ( MathUtils.compareDoubles(totalWeights, 0.0) == 0 ) return null; - double ratio = (double)refCount / (double)(refCount + altCount); - return String.format("%.2f", ratio); + return String.format("%.2f", (ratio / totalWeights)); } public String getKeyName() { return "AB"; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java new file mode 100755 index 000000000..87857ca76 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; + +import java.util.List; + + +public class BaseQualityRankSumTest extends RankSumTest { + + public String getKeyName() { return "BaseQRankSum"; } + + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("BaseQRankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); } + + protected void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( PileupElement p : pileup ) { + // ignore deletions + if ( p.isDeletion() ) + continue; + + char base = (char)p.getBase(); + if ( base == ref ) + refQuals.add((int)p.getQual()); + else if ( base == alt ) + altQuals.add((int)p.getQual()); + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java new file mode 100755 index 000000000..69c0e3273 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; + +import java.util.List; + + +public class MappingQualityRankSumTest extends RankSumTest { + + public String getKeyName() { return "MQRankSum"; } + + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MQRankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Read Mapping Qualities"); } + + protected void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { + for ( PileupElement p : pileup ) { + // ignore deletions + if ( p.isDeletion() ) + continue; + + char base = (char)p.getBase(); + if ( base == ref ) + refQuals.add(p.getMappingQual()); + else if ( base == alt ) + altQuals.add(p.getMappingQual()); + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java old mode 100755 new mode 100644 index b3abbd9ac..b8f660b4f --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -5,16 +5,14 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.List; import java.util.ArrayList; import java.util.Map; -public class RankSumTest implements VariantAnnotation { +public abstract class RankSumTest implements VariantAnnotation { private final static boolean DEBUG = false; private static final double minPValue = 1e-10; @@ -68,21 +66,5 @@ public class RankSumTest implements VariantAnnotation { return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)); } - public String getKeyName() { return "RankSum"; } - - public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("RankSum", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Phred-scaled p-value From Wilcoxon Rank Sum Test of Het Vs. Ref Base Qualities"); } - - private void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() ) - continue; - - char base = (char)p.getBase(); - if ( base == ref ) - refQuals.add((int)p.getQual()); - else if ( base == alt ) - altQuals.add((int)p.getQual()); - } - } + protected abstract void fillQualsFromPileup(char ref, char alt, ReadBackedPileup pileup, List refQuals, List altQuals); } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 4a82cbbc1..e39ad35b3 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("e3282acfec015441dfc38715d5bfe530")); + Arrays.asList("514d6357ae4970a422d0481f57f52e0c")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("17119c073a964620809eaaf3a39dfbc4")); + Arrays.asList("903c0a9642a34ed19dadee72ddcf4189")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("20943a3666ceef605d9154b6f2ff2fa1")); + Arrays.asList("90861b8af4514f8e64e4d93fe0fcd304")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("e865132880c6aa07bfdd725408fc21e8")); + Arrays.asList("0e9c65b14ef694bb3f246e58515dbdce")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 7ee1d6022..c4a54e75b 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("0078a4e6611cbfc7225f52209b87c4aa")); + Arrays.asList("eee834f4242d72fffb09cd248bd8ed86")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("9c862cf36124710648d6d8b83882c603")); + Arrays.asList("ee5c07b4e57966bf173dea100f1e89e0")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("4f6aeda8903be49344353b79ee7763b4")); + Arrays.asList("15d2e6040933704547a39a531869f36d")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testParallelization() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1, - Arrays.asList("dbaff01edcfc8d83efa937f5887b26c2")); + Arrays.asList("463c5c9cf9a10f8b737184c10c199783")); executeTest("test parallelization", spec); } @@ -98,11 +98,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "c6abe711dade515e72b671d7e0d6e724" ); - e.put( "-all_bases", "d9705e4c89f333f3fa6e771716db57e4" ); + e.put( "-genotype", "b69545bb707edbb033307aab0aecd85a" ); + e.put( "-all_bases", "7a2a691e8eda96e28c0868c87bba9402" ); e.put( "--min_base_quality_score 26", "015509aca82954b89d7dc83134f2103e" ); - e.put( "--min_mapping_quality_score 26", "a3926d2d24f9b4b4d772243a50ebdd59" ); - e.put( "--max_mismatches_in_40bp_window 5", "ab592c8d0996d9bc4bb6685eaf874883" ); + e.put( "--min_mapping_quality_score 26", "227bff531007ea610577be5b2696c8e4" ); + e.put( "--max_mismatches_in_40bp_window 5", "7b294de04436a2362c5536fa8af83da4" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -116,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("15af52b80e2be8e8d2d489c3f86bdadc")); + Arrays.asList("c2e4ebd66901d6160dc38cdaa29b89f3")); executeTest("testConfidence", spec); }