Added command line argument for the max value of the allele count prior in VariantRecalibrator (--max_ac_prior). Default value increased to 0.99 from 0.95.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4436 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-10-06 14:00:53 +00:00
parent c56c2641a8
commit 69485d6a7a
3 changed files with 10 additions and 8 deletions

View File

@ -256,8 +256,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
this.singletonFPRate = rate;
}
public final double getAlleleCountPrior( final int alleleCount ) {
return Math.min(0.95, 1.0 - Math.pow(singletonFPRate, alleleCount)); //TODO -- define the vals
public final double getAlleleCountPrior( final int alleleCount, final double maxProbability ) {
return Math.min( maxProbability, 1.0 - Math.pow(singletonFPRate, alleleCount) );
}
private void initializeUsingKMeans( final VariantDatum[] data ) {

View File

@ -79,7 +79,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
/////////////////////////////
@Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (~~2.07 for whole genome experiments)", required=true)
private double TARGET_TITV = 2.07;
@Argument(fullName="backOff", shortName="backOff", doc="The Gaussian back off factor, used to prevent overfitting by enlarging out the Gaussians.", required=false)
@Argument(fullName="backOff", shortName="backOff", doc="The Gaussian back off factor, used to prevent overfitting by enlarging the Gaussians.", required=false)
private double BACKOFF_FACTOR = 1.3;
@Argument(fullName="desired_num_variants", shortName="dV", doc="The desired number of variants to keep in a theoretically filtered set", required=false)
private int DESIRED_NUM_VARIANTS = 0;
@ -105,6 +105,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private double QUAL_STEP = 0.1;
@Argument(fullName="singleton_fp_rate", shortName="fp_rate", doc="Prior expectation that a singleton call would be a FP", required=false)
private double SINGLETON_FP_RATE = 0.5;
@Argument(fullName="max_ac_prior", shortName="maxACPrior", doc="Maximum value for the prior expectation based on allele count. Needed because (1 - 0.5^x) approaches 1.0 very quickly.", required=false)
private double MAX_AC_PRIOR = 0.99;
@Argument(fullName="quality_scale_factor", shortName="qScale", doc="Multiply all final quality scores by this value. Needed to normalize the quality scores.", required=false)
private double QUALITY_SCALE_FACTOR = 100.0;
@Argument(fullName="dontTrustACField", shortName="dontTrustACField", doc="If specified the VR will not use the AC field and will instead always parse the genotypes to figure out how many variant chromosomes there are at a given site.", required=false)
@ -268,7 +270,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// If this prior factor hasn't been calculated before, do so now
if(priorLodFactor == null) {
final double knownPrior = QualityUtils.qualToProb(knownPrior_qScore);
final double acPrior = theModel.getAlleleCountPrior( alleleCount );
final double acPrior = theModel.getAlleleCountPrior( alleleCount, MAX_AC_PRIOR );
final double totalPrior = 1.0 - ((1.0 - acPrior) * (1.0 - knownPrior));
if( MathUtils.compareDoubles(totalPrior, 1.0, 1E-8) == 0 || MathUtils.compareDoubles(totalPrior, 0.0, 1E-8) == 0 ) {

View File

@ -46,9 +46,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
public void testVariantRecalibrator() {
HashMap<String, List<String>> e = new HashMap<String, List<String>>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
Arrays.asList("e94b02016e6f7936999f02979b801c30", "038c31c5bb46a4df89b8ee69ec740812","7d42bbdfb69fdfb18cbda13a63d92602")); // Each test checks the md5 of three output files
Arrays.asList("ca80c95e47d22a79eb0700c039457aca", "ab6b91a3f97b682817d2cc068c34c317","a4802dbf883138ae1152109be990fb9d")); // Each test checks the md5 of three output files
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf",
Arrays.asList("bbdffb7fa611f4ae80e919cdf86b9bc6", "661360e85392af9c97e386399871854a","371e5a70a4006420737c5ab259e0e23e")); // Each test checks the md5 of three output files
Arrays.asList("a40d3b1dd7bfbb66a52145600f87d744", "e2c89c74debc1c202c56060b77575dff","8e8b55b521aaba1c44169a19e3ff2355")); // Each test checks the md5 of three output files
for ( Map.Entry<String, List<String>> entry : e.entrySet() ) {
String vcf = entry.getKey();
@ -84,8 +84,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testApplyVariantCuts() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "e06aa6b734cc3c881d95cf5ee9315664" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "ad8661cba3b04a7977c97a541fd8a668" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "ece0c15c34926fc585e12503f6ce6271" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "994b329a35d01e9564f5581cf3d9feac" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();