Added - new tests (Hapmap was re-added)
Modified - Hapmap now takes a -q command to filter out variants by quality Modified - MathUtils - cumBinomialProbLog now uses BigDecimal to handle some numerical imprecisions Modified - PowerBelowFrequency - returns 0.0 if called with a negative number (can't be done from inside the walker itself, but since it's called elsewhere one can't be too careful) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2350 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8e44bfd2ef
commit
b42fc905e8
|
|
@ -34,9 +34,11 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
|
|||
public int poolSize = -1;
|
||||
@Argument(fullName="sampleNames", shortName="samples", doc="Sample name bindings", required=true)
|
||||
public String sampleNameFile = null;
|
||||
@Argument(fullName="minCallQuality", shortName="q", doc="Ignore calls with below this quality, defaults to -1")
|
||||
public double minCallQ = -1;
|
||||
|
||||
private PrintWriter output;
|
||||
|
||||
private static double EPSILON = Math.pow(10,-4);
|
||||
private String[] sampleNames = null;
|
||||
private PowerBelowFrequencyWalker powerWalker = null;
|
||||
private ConcordanceTruthTable ctt = null;
|
||||
|
|
@ -83,7 +85,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
|
|||
Variation call = (Variation) tracker.lookup("calls",null);
|
||||
if ( call == null ) {
|
||||
called = 0;
|
||||
} else if ( call.isReference() ) {
|
||||
} else if ( call.isReference() || call.getNegLog10PError() < minCallQ-EPSILON ) {
|
||||
called = 0;
|
||||
} else {
|
||||
called = 1;
|
||||
|
|
|
|||
|
|
@ -19,11 +19,15 @@ import java.util.List;
|
|||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* User: chartl
|
||||
* Date: Oct 8, 2009
|
||||
* Time: 9:44:35 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Given an input N, this walker calculates the power to detect a polymorphism with N, N-1, N-2, ..., 1 variant alleles in a pooled setting
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
||||
@Argument(fullName="lodThreshold", shortName="lod", doc="Threshold for log likelihood ratio to be called a SNP. Defaults to 3.0", required = false)
|
||||
|
|
@ -159,17 +163,20 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
|||
|
||||
int kaccept = (int) Math.ceil( ( lodThreshold - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) /
|
||||
( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) );
|
||||
System.out.println("Error="+p_error+" snpProp="+snpProp+" alleles="+alleles+" lodThreshold="+lodThreshold+" kaccept="+kaccept);
|
||||
|
||||
// we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
|
||||
// we can optimize this by checking to see which sum is smaller
|
||||
|
||||
if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
|
||||
power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
|
||||
} else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
|
||||
power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
|
||||
if (kaccept <= 0) {
|
||||
power = 0.0;
|
||||
} else {
|
||||
// we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
|
||||
// we can optimize this by checking to see which sum is smaller
|
||||
if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
|
||||
power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
|
||||
} else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
|
||||
power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return power;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import cern.jet.math.Arithmetic;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
/**
|
||||
* MathUtils is a static class (no instantiation allowed!) with some useful math methods.
|
||||
*
|
||||
|
|
@ -180,11 +182,22 @@ public class MathUtils {
|
|||
*/
|
||||
public static double cumBinomialProbLog(int start, int end, int total, double probHit) {
|
||||
double cumProb = 0.0;
|
||||
double prevProb;
|
||||
BigDecimal probCache = BigDecimal.ZERO;
|
||||
|
||||
for(int hits = start; hits < end; hits++) {
|
||||
cumProb += binomialProbabilityLog(hits, total, probHit);
|
||||
prevProb = cumProb;
|
||||
double probability = binomialProbabilityLog(hits,total,probHit);
|
||||
cumProb += probability;
|
||||
if ( probability > 0 && cumProb - prevProb < probability/2 ) { // loss of precision
|
||||
probCache = probCache.add(new BigDecimal(prevProb));
|
||||
cumProb = 0.0;
|
||||
hits--; // repeat loop
|
||||
// prevProb changes at start of loop
|
||||
}
|
||||
}
|
||||
|
||||
return cumProb;
|
||||
return probCache.add(new BigDecimal(cumProb)).doubleValue();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
|
|
@ -12,48 +14,44 @@ import org.junit.Test;
|
|||
*/
|
||||
public class HapmapPoolAllelicInfoIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
@Test
|
||||
public void testFHSPool3() {
|
||||
// Help! I'm broken!
|
||||
|
||||
/*String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt "
|
||||
String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt "
|
||||
+ "-B /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_sample_paths.txt "
|
||||
+ "-B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli "
|
||||
+ "-I /humgen/gsa-scr1/GATK_Data/Validation_Data/FHSP_pool3_test.bam "
|
||||
+ "-R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -of %s "
|
||||
+ "-ps 40 -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list";
|
||||
+ "-ps 40 -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list -q 0";
|
||||
String md5ForThisTest = "da1222d7514f247eae066134d8e3cde3";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest));
|
||||
executeTest("Pool 3 of FHS Pilot on testbed intervals", spec); */
|
||||
executeTest("Pool 3 of FHS Pilot on testbed intervals", spec);
|
||||
}
|
||||
|
||||
// todo -- chris must fix
|
||||
// @Test
|
||||
// public void testFHSPool3NoIntervals() {
|
||||
// String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt "
|
||||
// + "-B /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_sample_paths.txt "
|
||||
// + "-B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli "
|
||||
// + "-I /humgen/gsa-scr1/GATK_Data/Validation_Data/FHSP_pool3_test.bam "
|
||||
// + "-R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -of %s "
|
||||
// + "-ps 40";
|
||||
// String md5ForThisTest = "120f3307d94d613c3559a1051fe3aaef";
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest));
|
||||
// executeTest("Pool 3 of FHS Pilot without testbed intervals", spec);
|
||||
// }
|
||||
@Test
|
||||
public void testFHSPool3NoIntervals() {
|
||||
String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_samples.txt "
|
||||
+ "-B /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_sample_paths.txt "
|
||||
+ "-B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli "
|
||||
+ "-I /humgen/gsa-scr1/GATK_Data/Validation_Data/FHSP_pool3_test.bam "
|
||||
+ "-R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -of %s "
|
||||
+ "-ps 40 -q 0";
|
||||
String md5ForThisTest = "9ee3f390657c32c9c4fbe654bcd3cdf9";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest));
|
||||
executeTest("Testing hapmap sites without reads at them (no testbed intervals)", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSmallPool() {
|
||||
// Help! I'm broken!
|
||||
|
||||
/*String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878_NA12891_samples.txt "
|
||||
String test_args = "-T HapmapPoolAllelicInfo -samples /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878_NA12891_samples.txt "
|
||||
+"-B NA12878,GFF,/humgen/gsa-scr1/andrewk/hapmap_1kg/gffs/NA12878.gff "
|
||||
+"-B NA12891,GFF,/humgen/gsa-scr1/andrewk/hapmap_1kg/gffs/NA12891.gff "
|
||||
+"-I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12891.a2b.recal.annotation_subset.bam "
|
||||
+"-L chr1:14000000-18000000 -B calls,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12891.a2b.ssg1b.geli.calls "
|
||||
+"-of %s -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -ps 2";
|
||||
+"-of %s -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -ps 2 -q 0";
|
||||
String md5ForThisTest = "cc81a9d8279e7b6ad50f9dc1f19a7419";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(test_args, 1, Arrays.asList(md5ForThisTest));
|
||||
executeTest("Test on pool of two individuals -- CEU father+daughter chips against subset of the father's calls; power at lod 3", spec);
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Dec 14, 2009
|
||||
* Time: 10:19:53 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PowerTest extends BaseTest {
|
||||
// test for various items involved in PowerBelowFrequency
|
||||
@Test
|
||||
public void testBinomialProbabilityLog() {
|
||||
logger.warn("Testing binomial probability in log space");
|
||||
Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(5,7,0.3),0.02500,1e-4)==0);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(8000,15000,0.6),2.56e-62,1e-64) == 0);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(12000,20000,0.5),7.34e-178,1e-180) == 0);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(600,800,0.75),0.0326,0.0001) == 0);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(50,500,0.2),6.404e-10,1e-13)==0);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(MathUtils.binomialProbabilityLog(5277,8279,0.60344),1.6023145e-11,1e-18) == 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBinomialCDF() {
|
||||
logger.warn("Testing binomial CDF");
|
||||
double test1 = MathUtils.cumBinomialProbLog(0,5277,8279,0.64344);
|
||||
System.out.println("Binomial CDF -- Test 1: "+test1);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(test1,0.127,5e-3) == 0);
|
||||
double test2 = MathUtils.cumBinomialProbLog(0,1472,9886,0.172);
|
||||
System.out.println("Binomial CDF -- Test 2: "+test2);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(test2,3.1e-10,5e-11)==0);
|
||||
double test3 = MathUtils.cumBinomialProbLog(0,1472,9886,0.142);
|
||||
System.out.println("Binomial CDF -- Test 3: "+test3);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(test3,0.975,5e-3)==0);
|
||||
double test4 = MathUtils.cumBinomialProbLog(0,6781,21297,0.321742);
|
||||
System.out.println("Binomial CDF -- Test 4:"+test4);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(test4,0.150,5e-3)==0);
|
||||
double test5 = MathUtils.cumBinomialProbLog(0,10609,21297,0.5001);
|
||||
System.out.println("Binomial CDF == Test 5:"+test5);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(test5,0.286,5e-3)==0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* @Test
|
||||
public void testPowerCalculation() {
|
||||
PowerBelowFrequencyWalker w = new PowerBelowFrequencyWalker();
|
||||
w.initialize();
|
||||
logger.warn("Testing power on pool of 100 people");
|
||||
w.setPoolSize(100);
|
||||
// TEST 1
|
||||
double error = 0.000123;
|
||||
byte q = QualityUtils.probToQual(1-error);
|
||||
double pow = w.theoreticalPower(1000,q,1,5);
|
||||
System.out.println("Qual for "+error+"="+q+" and power="+pow);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(pow,0.7356,1e-4) == 0);
|
||||
// TEST 2
|
||||
error = 0.00512;
|
||||
q = QualityUtils.probToQual(1-error);
|
||||
double err2 = QualityUtils.qualToErrorProb(q);
|
||||
pow = w.theoreticalPower(40,q,12,3);
|
||||
System.out.println("Qual for "+error+"="+q+" back to error="+err2+" and power="+pow);
|
||||
Assert.assertTrue(MathUtils.compareDoubles(pow,0.2173,1e-4) == 0);
|
||||
|
||||
}*/
|
||||
}
|
||||
Loading…
Reference in New Issue