Added command line argument for the max value of the allele count prior in VariantRecalibrator (--max_ac_prior). Default value increased to 0.99 from 0.95.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4436 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c56c2641a8
commit
69485d6a7a
|
|
@ -256,8 +256,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
this.singletonFPRate = rate;
|
||||
}
|
||||
|
||||
public final double getAlleleCountPrior( final int alleleCount ) {
|
||||
return Math.min(0.95, 1.0 - Math.pow(singletonFPRate, alleleCount)); //TODO -- define the vals
|
||||
public final double getAlleleCountPrior( final int alleleCount, final double maxProbability ) {
|
||||
return Math.min( maxProbability, 1.0 - Math.pow(singletonFPRate, alleleCount) );
|
||||
}
|
||||
|
||||
private void initializeUsingKMeans( final VariantDatum[] data ) {
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
/////////////////////////////
|
||||
@Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (~~2.07 for whole genome experiments)", required=true)
|
||||
private double TARGET_TITV = 2.07;
|
||||
@Argument(fullName="backOff", shortName="backOff", doc="The Gaussian back off factor, used to prevent overfitting by enlarging out the Gaussians.", required=false)
|
||||
@Argument(fullName="backOff", shortName="backOff", doc="The Gaussian back off factor, used to prevent overfitting by enlarging the Gaussians.", required=false)
|
||||
private double BACKOFF_FACTOR = 1.3;
|
||||
@Argument(fullName="desired_num_variants", shortName="dV", doc="The desired number of variants to keep in a theoretically filtered set", required=false)
|
||||
private int DESIRED_NUM_VARIANTS = 0;
|
||||
|
|
@ -105,6 +105,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
private double QUAL_STEP = 0.1;
|
||||
@Argument(fullName="singleton_fp_rate", shortName="fp_rate", doc="Prior expectation that a singleton call would be a FP", required=false)
|
||||
private double SINGLETON_FP_RATE = 0.5;
|
||||
@Argument(fullName="max_ac_prior", shortName="maxACPrior", doc="Maximum value for the prior expectation based on allele count. Needed because (1 - 0.5^x) approaches 1.0 very quickly.", required=false)
|
||||
private double MAX_AC_PRIOR = 0.99;
|
||||
@Argument(fullName="quality_scale_factor", shortName="qScale", doc="Multiply all final quality scores by this value. Needed to normalize the quality scores.", required=false)
|
||||
private double QUALITY_SCALE_FACTOR = 100.0;
|
||||
@Argument(fullName="dontTrustACField", shortName="dontTrustACField", doc="If specified the VR will not use the AC field and will instead always parse the genotypes to figure out how many variant chromosomes there are at a given site.", required=false)
|
||||
|
|
@ -268,7 +270,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// If this prior factor hasn't been calculated before, do so now
|
||||
if(priorLodFactor == null) {
|
||||
final double knownPrior = QualityUtils.qualToProb(knownPrior_qScore);
|
||||
final double acPrior = theModel.getAlleleCountPrior( alleleCount );
|
||||
final double acPrior = theModel.getAlleleCountPrior( alleleCount, MAX_AC_PRIOR );
|
||||
final double totalPrior = 1.0 - ((1.0 - acPrior) * (1.0 - knownPrior));
|
||||
|
||||
if( MathUtils.compareDoubles(totalPrior, 1.0, 1E-8) == 0 || MathUtils.compareDoubles(totalPrior, 0.0, 1E-8) == 0 ) {
|
||||
|
|
|
|||
|
|
@ -46,9 +46,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
public void testVariantRecalibrator() {
|
||||
HashMap<String, List<String>> e = new HashMap<String, List<String>>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
|
||||
Arrays.asList("e94b02016e6f7936999f02979b801c30", "038c31c5bb46a4df89b8ee69ec740812","7d42bbdfb69fdfb18cbda13a63d92602")); // Each test checks the md5 of three output files
|
||||
Arrays.asList("ca80c95e47d22a79eb0700c039457aca", "ab6b91a3f97b682817d2cc068c34c317","a4802dbf883138ae1152109be990fb9d")); // Each test checks the md5 of three output files
|
||||
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf",
|
||||
Arrays.asList("bbdffb7fa611f4ae80e919cdf86b9bc6", "661360e85392af9c97e386399871854a","371e5a70a4006420737c5ab259e0e23e")); // Each test checks the md5 of three output files
|
||||
Arrays.asList("a40d3b1dd7bfbb66a52145600f87d744", "e2c89c74debc1c202c56060b77575dff","8e8b55b521aaba1c44169a19e3ff2355")); // Each test checks the md5 of three output files
|
||||
|
||||
for ( Map.Entry<String, List<String>> entry : e.entrySet() ) {
|
||||
String vcf = entry.getKey();
|
||||
|
|
@ -84,8 +84,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testApplyVariantCuts() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "e06aa6b734cc3c881d95cf5ee9315664" );
|
||||
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "ad8661cba3b04a7977c97a541fd8a668" );
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "ece0c15c34926fc585e12503f6ce6271" );
|
||||
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "994b329a35d01e9564f5581cf3d9feac" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String vcf = entry.getKey();
|
||||
|
|
|
|||
Loading…
Reference in New Issue