Officially fixed the UG priors; updated the default min MQ/BQs to pipeline values of q20 and min calling threshold to Q50
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4431 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c6668bd49c
commit
b5e148140b
|
|
@ -39,7 +39,9 @@ import java.util.*;
|
|||
|
||||
public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalculationModel {
|
||||
|
||||
protected DiploidGenotypeCalculationModel() {}
|
||||
protected DiploidGenotypeCalculationModel(int N, double heterozygosity) {
|
||||
super(N, heterozygosity);
|
||||
}
|
||||
|
||||
// the GenotypeLikelihoods map
|
||||
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class GenotypeCalculationModelFactory {
|
|||
GenotypeCalculationModel gcm;
|
||||
switch ( UAC.genotypeModel ) {
|
||||
case JOINT_ESTIMATE:
|
||||
gcm = new DiploidGenotypeCalculationModel();
|
||||
gcm = new DiploidGenotypeCalculationModel(samples.size(), UAC.heterozygosity);
|
||||
break;
|
||||
case DINDEL:
|
||||
throw new UnsupportedOperationException("The Dindel-based genotype likelihoods model is not currently supported");
|
||||
|
|
|
|||
|
|
@ -19,15 +19,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
|
||||
private int minAlleleFrequencyToTest;
|
||||
|
||||
// because the null allele frequencies are constant for a given N,
|
||||
// we cache the results to avoid having to recompute everything
|
||||
private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
|
||||
|
||||
// because the Hardy-Weinberg values for a given frequency are constant,
|
||||
// we cache the results to avoid having to recompute everything
|
||||
// private HashMap<Double, double[]> hardyWeinbergValueCache = new HashMap<Double, double[]>();
|
||||
|
||||
// the allele frequency priors
|
||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||
protected double[] log10AlleleFrequencyPriors;
|
||||
|
||||
// the allele frequency posteriors and P(f>0) for each alternate allele
|
||||
|
|
@ -45,8 +37,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
protected static final Set<String> filter = new HashSet<String>(1);
|
||||
|
||||
|
||||
protected JointEstimateGenotypeCalculationModel() {
|
||||
protected JointEstimateGenotypeCalculationModel(int N, double heterozygosity) {
|
||||
filter.add(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME);
|
||||
computeAlleleFrequencyPriors(N, heterozygosity);
|
||||
}
|
||||
|
||||
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, byte[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
||||
|
|
@ -79,8 +72,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
}
|
||||
|
||||
// calculate likelihoods if there are non-ref bases
|
||||
initializeAlleleFrequencies(frequencyEstimationPoints);
|
||||
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
|
|
@ -142,41 +133,20 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
return;
|
||||
}
|
||||
|
||||
protected void initializeAlleleFrequencies(int frequencyEstimationPoints) {
|
||||
// set up the allele frequency priors
|
||||
log10AlleleFrequencyPriors = getNullAlleleFrequencyPriors(frequencyEstimationPoints);
|
||||
}
|
||||
protected void computeAlleleFrequencyPriors(int N, double heterozygosity) {
|
||||
int AFs = (2 * N) + 1;
|
||||
log10AlleleFrequencyPriors = new double[AFs];
|
||||
|
||||
protected double[] getNullAlleleFrequencyPriors(int N) {
|
||||
double[] AFs = nullAlleleFrequencyCache.get(N);
|
||||
|
||||
// if it hasn't been calculated yet, do so now
|
||||
if ( AFs == null ) {
|
||||
|
||||
// calculate sum(1/i)
|
||||
double sigma_1_over_I = 0.0;
|
||||
for (int i = 1; i < N; i++)
|
||||
sigma_1_over_I += 1.0 / (double)i;
|
||||
|
||||
// delta = theta / sum(1/i)
|
||||
double delta = UAC.heterozygosity / sigma_1_over_I;
|
||||
|
||||
// calculate the null allele frequencies for 1-N
|
||||
AFs = new double[N];
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i < N; i++) {
|
||||
double value = delta / (double)i;
|
||||
AFs[i] = Math.log10(value);
|
||||
sum += value;
|
||||
}
|
||||
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
AFs[0] = Math.log10(1.0 - sum);
|
||||
|
||||
nullAlleleFrequencyCache.put(N, AFs);
|
||||
// calculate the allele frequency priors for 1-N
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i < AFs; i++) {
|
||||
double value = heterozygosity / (double)i;
|
||||
log10AlleleFrequencyPriors[i] = Math.log10(value);
|
||||
sum += value;
|
||||
}
|
||||
|
||||
return AFs;
|
||||
// null frequency for AF=0 is (1 - sum(all other frequencies))
|
||||
log10AlleleFrequencyPriors[0] = Math.log10(1.0 - sum);
|
||||
}
|
||||
|
||||
private void estimateReferenceConfidence(VariantCallContext vcc, Map<String, StratifiedAlignmentContext> contexts, double theta, boolean ignoreCoveredSamples) {
|
||||
|
|
|
|||
|
|
@ -49,16 +49,16 @@ public class UnifiedArgumentCollection {
|
|||
public boolean ALL_BASES_MODE = false;
|
||||
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 50.0;
|
||||
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 50.0;
|
||||
|
||||
@Argument(fullName = "trigger_min_confidence_threshold_for_calling", shortName = "trig_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be called", required = false)
|
||||
public double TRIGGER_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
public double TRIGGER_CONFIDENCE_FOR_CALLING = 50.0;
|
||||
|
||||
@Argument(fullName = "trigger_min_confidence_threshold_for_emitting", shortName = "trig_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
public double TRIGGER_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
public double TRIGGER_CONFIDENCE_FOR_EMITTING = 50.0;
|
||||
|
||||
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
|
||||
public boolean NO_SLOD = false;
|
||||
|
|
@ -76,10 +76,10 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
// control the various parameters to be used
|
||||
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
public int MIN_BASE_QUALTY_SCORE = 10;
|
||||
public int MIN_BASE_QUALTY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 10;
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false)
|
||||
public int MAX_MISMATCHES = 3;
|
||||
|
|
|
|||
|
|
@ -52,16 +52,16 @@ public class UnifiedArgumentCollection {
|
|||
public boolean ALL_BASES_MODE = false;
|
||||
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
public double STANDARD_CONFIDENCE_FOR_CALLING = 50.0;
|
||||
|
||||
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
public double STANDARD_CONFIDENCE_FOR_EMITTING = 50.0;
|
||||
|
||||
@Argument(fullName = "trigger_min_confidence_threshold_for_calling", shortName = "trig_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be called", required = false)
|
||||
public double TRIGGER_CONFIDENCE_FOR_CALLING = 30.0;
|
||||
public double TRIGGER_CONFIDENCE_FOR_CALLING = 50.0;
|
||||
|
||||
@Argument(fullName = "trigger_min_confidence_threshold_for_emitting", shortName = "trig_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
|
||||
public double TRIGGER_CONFIDENCE_FOR_EMITTING = 30.0;
|
||||
public double TRIGGER_CONFIDENCE_FOR_EMITTING = 50.0;
|
||||
|
||||
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
|
||||
public boolean NO_SLOD = false;
|
||||
|
|
@ -79,10 +79,10 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
// control the various parameters to be used
|
||||
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
public int MIN_BASE_QUALTY_SCORE = 10;
|
||||
public int MIN_BASE_QUALTY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 10;
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false)
|
||||
public int MAX_MISMATCHES = 3;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ public class
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("42f589f8743ec16e72a4697c728502ed"));
|
||||
Arrays.asList("9870dd089d0f76c8dd3e74bc9f2ba0f0"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ public class
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("32ba9f34185a0aec3107efafe5130556"));
|
||||
Arrays.asList("eeed5d52f4bdeed2c73ad5d202cbba26"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ public class
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("3d9de73b764a55deac6a956c56c46373"));
|
||||
Arrays.asList("c34a080e6799ef84f73c6e8d16f7d6ea"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ public class
|
|||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
String md5 = "6fdddf70e8320e04dba50a9ed0f26854";
|
||||
String md5 = "ce616a641a2e00d29510e99c6606d151";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -80,11 +80,11 @@ public class
|
|||
@Test
|
||||
public void testParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "8428439ee41ea0e024fcfe1c267b5e2d" );
|
||||
e.put( "-all_bases", "b8fd0a213362743b174bd2aeba7d0f8c" );
|
||||
e.put( "--min_base_quality_score 26", "be88f3992f7da095e788ff372bf94190" );
|
||||
e.put( "--min_mapping_quality_score 26", "266e9f95bd577a3b8ebdb5d66925a74d" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "61d7de457fd591344a7f8561aeb816a0" );
|
||||
e.put( "-genotype", "0bcea27abc956352d18ae2de23dcaa9c" );
|
||||
e.put( "-all_bases", "aab0acf3e7f494a57cb2ad673ae8d32e" );
|
||||
e.put( "--min_base_quality_score 26", "48837ae22a75002d448806f6b46dd3b3" );
|
||||
e.put( "--min_mapping_quality_score 26", "d388e847b655abf56a7fd945bdff5dee" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "416ba33cf1cfc0b4b4a2ad3256c8b56b" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -98,12 +98,12 @@ public class
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("758555ebda5a145e21edd3cba5193817"));
|
||||
Arrays.asList("aedfce6b54d3138e0459f62421185760"));
|
||||
executeTest("testConfidence1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("291e2b7d0f8c3b262306c18d61782711"));
|
||||
Arrays.asList("b442c0fd6e0d3365e8bc2d9149eeb6f5"));
|
||||
executeTest("testConfidence2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -115,8 +115,8 @@ public class
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "61a54794bc262479a981b6eb83ce6243" );
|
||||
e.put( 1.0 / 1850, "b34d30518143ea407e6570eff101f708" );
|
||||
e.put( 0.01, "2588b41cd3debfc2d58fdf6b463bcc07" );
|
||||
e.put( 1.0 / 1850, "18fa98796906658402cedd6cc3f28be6" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -135,8 +135,8 @@ public class
|
|||
@Test
|
||||
public void testOtherBaseCallModel() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "one_state", "97d22192536b1efe0163c062ecf64612" );
|
||||
e.put( "three_state", "fd48388392b877afcfa28507067fbe63" );
|
||||
e.put( "one_state", "62ee552b32d9193b700c1dd8e3eeb4b9" );
|
||||
e.put( "three_state", "f2971d0964780b9abfd38e860080c342" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -159,7 +159,7 @@ public class
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("bb112d6f907608a0e8de37cd0c887956"));
|
||||
Arrays.asList("4469242939216eac5eb6b6b73ae6a509"));
|
||||
|
||||
executeTest(String.format("testMultiTechnologies"), spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue