- Smarter strand bias calculation
- Better debug/verbose printing git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2639 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2261f57e5c
commit
4ac9eb7cb2
|
|
@ -26,6 +26,8 @@ public class Allele {
|
|||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
|
||||
if ( type == AlleleType.DELETION && bases.length() > 0 )
|
||||
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
|
||||
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP) && bases.length() > 1 )
|
||||
throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
|
||||
this.bases = bases.toUpperCase();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
throw new StingException("Frequency was incremented past N; how is this possible?");
|
||||
frequency++;
|
||||
|
||||
double greedy = -1.0 * Double.MAX_VALUE;
|
||||
double greedy = VALUE_NOT_CALCULATED;
|
||||
int greedyIndex = -1;
|
||||
for (int i = 0; i < N; i++) {
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
// for use in optimizing the P(D|AF) calculations:
|
||||
// how much off from the max likelihoods do we need to be before we can quit calculating?
|
||||
protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
|
||||
protected static final Double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
|
||||
|
||||
// because the null allele frequencies are constant for a given N,
|
||||
// we cache the results to avoid having to recompute everything
|
||||
|
|
@ -205,7 +206,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
*/
|
||||
protected void ignoreAlleleFrequenciesAboveI(int freqI, int numFrequencies, int altBaseIndex) {
|
||||
while ( ++freqI <= numFrequencies )
|
||||
log10PofDgivenAFi[altBaseIndex][freqI] = -1.0 * Double.MAX_VALUE;
|
||||
log10PofDgivenAFi[altBaseIndex][freqI] = VALUE_NOT_CALCULATED;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -243,7 +244,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele != ref ) {
|
||||
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i]));
|
||||
double PofDgivenAF = log10PofDgivenAFi[baseIndex][i];
|
||||
if ( PofDgivenAF == VALUE_NOT_CALCULATED )
|
||||
PofDgivenAF = 0.0;
|
||||
AFline.append(String.format("%.8f\t%.8f\t", PofDgivenAF, alleleFrequencyPosteriors[baseIndex][i]));
|
||||
} else {
|
||||
AFline.append(String.format("%.8f\t%.8f\t", -1.0, -1.0));
|
||||
}
|
||||
|
|
@ -263,7 +267,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
verboseWriter.println("Qscore_" + base + " = " + phredScaledConfidence);
|
||||
verboseWriter.println("LOD_" + base + " = " + phredScaledConfidence);
|
||||
} else {
|
||||
verboseWriter.println("P(f>0)_" + base + " = " + Math.log10(PofFs[baseIndex]));
|
||||
verboseWriter.println("P(f>0)_" + base + " = " + PofFs[baseIndex]);
|
||||
verboseWriter.println("Qscore_" + base + " = " + (QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[baseIndex][0])));
|
||||
verboseWriter.println("LOD_" + base + " = " + (Math.log10(PofFs[baseIndex]) - Math.log10(alleleFrequencyPosteriors[baseIndex][0])));
|
||||
}
|
||||
|
|
@ -337,42 +341,35 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
}
|
||||
if ( locusdata instanceof SLODBacked && REPORT_SLOD ) {
|
||||
// the overall lod
|
||||
double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
double overallLog10PofF = Math.log10(PofFs[indexOfMax]);
|
||||
double overallLog10PofNull = log10AlleleFrequencyPriors[0] + log10PofDgivenAFi[indexOfMax][0];
|
||||
double overallLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess];
|
||||
double lod = overallLog10PofF - overallLog10PofNull;
|
||||
|
||||
// I'm not sure what to do when the numbers are so small as to make the lod infinite.
|
||||
// For now, we'll just not compute strand bias...
|
||||
if ( !Double.isInfinite(lod) ) {
|
||||
// the forward lod
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPriors[0] + log10PofDgivenAFi[indexOfMax][0];
|
||||
double forwardLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess];
|
||||
|
||||
// the forward lod
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
double forwardLog10PofF = Math.log10(PofFs[indexOfMax]);
|
||||
if ( Double.isInfinite(lod) )
|
||||
lod = -1.0 * log10PofDgivenAFi[indexOfMax][0];
|
||||
// the reverse lod
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPriors[0] + log10PofDgivenAFi[indexOfMax][0];
|
||||
double reverseLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess];
|
||||
|
||||
// the reverse lod
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
double reverseLog10PofF = Math.log10(PofFs[indexOfMax]);
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull;
|
||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull;
|
||||
//logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull;
|
||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull;
|
||||
//logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
// strand score is max bias between forward and reverse strands
|
||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
// strand score is max bias between forward and reverse strands
|
||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
((SLODBacked)locusdata).setSLOD(strandScore);
|
||||
}
|
||||
((SLODBacked)locusdata).setSLOD(strandScore);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
if(UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE) {
|
||||
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
|
||||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
char base = Character.toLowerCase(altAllele);
|
||||
char base = Character.toUpperCase(altAllele);
|
||||
header.append("POfDGivenAFFor" + base + "\t");
|
||||
header.append("PosteriorAFFor" + base + "\t");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("e344fa16b155be00bc4d9b8cc3221187"));
|
||||
Arrays.asList("f2d536b48688a2c12a24f0708d02ff94"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("6f85a0d0134675be6fe8d3af9e9f7467"));
|
||||
Arrays.asList("25d1a6c7709421ff25187d951c6a66fe"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("4a7e44e834d100b143e52beffdd79d1f"));
|
||||
Arrays.asList("791e11574c122be4733d8d446d557385"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("d3adfbae8c9d18851cab13659239bc03"));
|
||||
Arrays.asList("ca042c068e64c298a2718926a900ed93"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testParallelization() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1,
|
||||
Arrays.asList("a57dabbe7dd12015759de72fef8bc879"));
|
||||
Arrays.asList("f935efcce647ba36e9ada8d4d9f6134c"));
|
||||
executeTest("test parallelization", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,11 +98,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-genotype", "65abc7cbf940f89037ec61b764054060" );
|
||||
e.put( "-all_bases", "ff63408d7b5aeb90bc85e471d9de1390" );
|
||||
e.put( "--min_base_quality_score 26", "287858958aa8cf3be8b43c12cb317ebc" );
|
||||
e.put( "--min_mapping_quality_score 26", "4e70544703309610e70164db100c66b7" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "14e17c27b6559f9b98d91842c271db3f" );
|
||||
e.put( "-genotype", "5200cb9c80228e9824cb92987d665756" );
|
||||
e.put( "-all_bases", "ebb2179d84fbdb1ae1e0945707bfd9e5" );
|
||||
e.put( "--min_base_quality_score 26", "0f20580897553804d32e7d52498e1fe0" );
|
||||
e.put( "--min_mapping_quality_score 26", "8966d53d86d9ba5762b0ce45ef5f7db3" );
|
||||
e.put( "--max_mismatches_in_40bp_window 5", "74fc45099520f84e5d185bddb025b1d7" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -116,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
|
||||
Arrays.asList("ae98a5e96af774514eac5dea31078e0c"));
|
||||
Arrays.asList("ed3590332d862f160583e00ae7125ade"));
|
||||
executeTest("testConfidence", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue