diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java index bdb598d30..98e188fac 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java @@ -34,9 +34,11 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker { @Argument(fullName="lodThreshold", shortName="lod", doc="Threshold for log likelihood ratio to be called a SNP. Defaults to 3.0", required = false) @@ -159,17 +163,20 @@ public class PowerBelowFrequencyWalker extends LocusWalker { int kaccept = (int) Math.ceil( ( lodThreshold - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) / ( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) ); + System.out.println("Error="+p_error+" snpProp="+snpProp+" alleles="+alleles+" lodThreshold="+lodThreshold+" kaccept="+kaccept); - // we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs - // we can optimize this by checking to see which sum is smaller - - if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth] - power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp); - } else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept] - power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp); + if (kaccept <= 0) { + power = 0.0; + } else { + // we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs + // we can optimize this by checking to see which sum is smaller + if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth] + power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp); + } else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept] + power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp); + } } } - return power; } diff --git a/java/src/org/broadinstitute/sting/utils/MathUtils.java b/java/src/org/broadinstitute/sting/utils/MathUtils.java index ccde99503..4fcd28092 100755 --- a/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils; import cern.jet.math.Arithmetic; +import java.math.BigDecimal; + /** * MathUtils is a static class (no instantiation allowed!) with some useful math methods. * @@ -180,11 +182,22 @@ public class MathUtils { */ public static double cumBinomialProbLog(int start, int end, int total, double probHit) { double cumProb = 0.0; + double prevProb; + BigDecimal probCache = BigDecimal.ZERO; + for(int hits = start; hits < end; hits++) { - cumProb += binomialProbabilityLog(hits, total, probHit); + prevProb = cumProb; + double probability = binomialProbabilityLog(hits,total,probHit); + cumProb += probability; + if ( probability > 0 && cumProb - prevProb < probability/2 ) { // loss of precision + probCache = probCache.add(new BigDecimal(prevProb)); + cumProb = 0.0; + hits--; // repeat loop + // prevProb changes at start of loop + } } - return cumProb; + return probCache.add(new BigDecimal(cumProb)).doubleValue(); } /** diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoIntegrationTest.java index 8b74efcea..a19f03977 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoIntegrationTest.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers; import org.broadinstitute.sting.WalkerTest; import org.junit.Test; +import java.util.Arrays; + /** * Created by IntelliJ IDEA. * User: chartl @@ -12,48 +14,44 @@ import org.junit.Test; */ public class HapmapPoolAllelicInfoIntegrationTest extends WalkerTest { - @Test + @Test public void testFHSPool3() { - // Help! I'm broken! - - /*String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt " + String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt " + "-B /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_sample_paths.txt " + "-B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " + "-I /humgen/gsa-scr1/GATK_Data/Validation_Data/FHSP_pool3_test.bam " + "-R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -of %s " - + "-ps 40 -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list"; + + "-ps 40 -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list -q 0"; String md5ForThisTest = "da1222d7514f247eae066134d8e3cde3"; WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest)); - executeTest("Pool 3 of FHS Pilot on testbed intervals", spec); */ + executeTest("Pool 3 of FHS Pilot on testbed intervals", spec); } - // todo -- chris must fix -// @Test -// public void testFHSPool3NoIntervals() { -// String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt " -// + "-B /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_sample_paths.txt " -// + "-B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " -// + "-I /humgen/gsa-scr1/GATK_Data/Validation_Data/FHSP_pool3_test.bam " -// + "-R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -of %s " -// + "-ps 40"; -// String md5ForThisTest = "120f3307d94d613c3559a1051fe3aaef"; -// WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest)); -// executeTest("Pool 3 of FHS Pilot without testbed intervals", spec); -// } + @Test + public void testFHSPool3NoIntervals() { + String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt " + + "-B /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_sample_paths.txt " + + "-B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " + + "-I /humgen/gsa-scr1/GATK_Data/Validation_Data/FHSP_pool3_test.bam " + + "-R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -of %s " + + "-ps 40 -q 0"; + String md5ForThisTest = "9ee3f390657c32c9c4fbe654bcd3cdf9"; + WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest)); + executeTest("Testing hapmap sites without reads at them (no testbed intervals)", spec); + } @Test public void testSmallPool() { - // Help! I'm broken! - /*String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878_NA12891_samples.txt " + String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878_NA12891_samples.txt " +"-B NA12878,GFF,/humgen/gsa-scr1/andrewk/hapmap_1kg/gffs/NA12878.gff " +"-B NA12891,GFF,/humgen/gsa-scr1/andrewk/hapmap_1kg/gffs/NA12891.gff " +"-I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12891.a2b.recal.annotation_subset.bam " +"-L chr1:14000000-18000000 -B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12891.a2b.ssg1b.geli.calls " - +"-of %s -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -ps 2"; + +"-of %s -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -ps 2 -q 0"; String md5ForThisTest = "cc81a9d8279e7b6ad50f9dc1f19a7419"; WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest)); executeTest("Test on pool of two individuals -- CEU father+daughter chips against subset of the father's calls; power at lod 3", spec); - */ + } } diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerTest.java new file mode 100644 index 000000000..3301a0f29 --- /dev/null +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerTest.java @@ -0,0 +1,75 @@ +package org.broadinstitute.sting.playground.gatk.walkers.poolseq; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.junit.Test; +import org.junit.Assert; + +import java.math.BigDecimal; + + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Dec 14, 2009 + * Time: 10:19:53 AM + * To change this template use File | Settings | File Templates. + */ +public class PowerTest extends BaseTest { + // test for various items involved in PowerBelowFrequency + @Test + public void testBinomialProbabilityLog() { + logger.warn("Testing binomial probability in log space"); + Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(5,7,0.3),0.02500,1e-4)==0); + Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(8000,15000,0.6),2.56e-62,1e-64) == 0); + Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(12000,20000,0.5),7.34e-178,1e-180) == 0); + Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(600,800,0.75),0.0326,0.0001) == 0); + Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(50,500,0.2),6.404e-10,1e-13)==0); + Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(5277,8279,0.60344),1.6023145e-11,1e-18) == 0); + } + + @Test + public void testBinomialCDF() { + logger.warn("Testing binomial CDF"); + double test1 = MathUtils.cumBinomialProbLog(0,5277,8279,0.64344); + System.out.println("Binomial CDF -- Test 1: "+test1); + Assert.assertTrue(MathUtils.compareDoubles(test1,0.127,5e-3) == 0); + double test2 = MathUtils.cumBinomialProbLog(0,1472,9886,0.172); + System.out.println("Binomial CDF -- Test 2: "+test2); + Assert.assertTrue(MathUtils.compareDoubles(test2,3.1e-10,5e-11)==0); + double test3 = MathUtils.cumBinomialProbLog(0,1472,9886,0.142); + System.out.println("Binomial CDF -- Test 3: "+test3); + Assert.assertTrue(MathUtils.compareDoubles(test3,0.975,5e-3)==0); + double test4 = MathUtils.cumBinomialProbLog(0,6781,21297,0.321742); + System.out.println("Binomial CDF -- Test 4:"+test4); + Assert.assertTrue(MathUtils.compareDoubles(test4,0.150,5e-3)==0); + double test5 = MathUtils.cumBinomialProbLog(0,10609,21297,0.5001); + System.out.println("Binomial CDF == Test 5:"+test5); + Assert.assertTrue(MathUtils.compareDoubles(test5,0.286,5e-3)==0); + } + + + +/* @Test + public void testPowerCalculation() { + PowerBelowFrequencyWalker w = new PowerBelowFrequencyWalker(); + w.initialize(); + logger.warn("Testing power on pool of 100 people"); + w.setPoolSize(100); + // TEST 1 + double error = 0.000123; + byte q = QualityUtils.probToQual(1-error); + double pow = w.theoreticalPower(1000,q,1,5); + System.out.println("Qual for "+error+"="+q+" and power="+pow); + Assert.assertTrue(MathUtils.compareDoubles(pow,0.7356,1e-4) == 0); + // TEST 2 + error = 0.00512; + q = QualityUtils.probToQual(1-error); + double err2 = QualityUtils.qualToErrorProb(q); + pow = w.theoreticalPower(40,q,12,3); + System.out.println("Qual for "+error+"="+q+" back to error="+err2+" and power="+pow); + Assert.assertTrue(MathUtils.compareDoubles(pow,0.2173,1e-4) == 0); + + }*/ +}