diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java index 4c00af667..5e34aaa5a 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/Allele.java @@ -26,6 +26,8 @@ public class Allele { throw new IllegalArgumentException("Constructor: the Allele base string cannot be null"); if ( type == AlleleType.DELETION && bases.length() > 0 ) throw new IllegalArgumentException("Constructor: deletions cannot have observed bases"); + if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP) && bases.length() > 1 ) + throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base"); this.bases = bases.toUpperCase(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index e344489dd..cb050587a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -183,7 +183,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul throw new StingException("Frequency was incremented past N; how is this possible?"); frequency++; - double greedy = -1.0 * Double.MAX_VALUE; + double greedy = VALUE_NOT_CALCULATED; int greedyIndex = -1; for (int i = 0; i < N; i++) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index e4480385e..80d8fd159 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -15,6 +15,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // for use in optimizing the P(D|AF) calculations: // how much off from the max likelihoods do we need to be before we can quit calculating? protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0; + protected static final Double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; // because the null allele frequencies are constant for a given N, // we cache the results to avoid having to recompute everything @@ -205,7 +206,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc */ protected void ignoreAlleleFrequenciesAboveI(int freqI, int numFrequencies, int altBaseIndex) { while ( ++freqI <= numFrequencies ) - log10PofDgivenAFi[altBaseIndex][freqI] = -1.0 * Double.MAX_VALUE; + log10PofDgivenAFi[altBaseIndex][freqI] = VALUE_NOT_CALCULATED; } /** @@ -243,7 +244,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc for ( char altAllele : BaseUtils.BASES ) { if ( altAllele != ref ) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); - AFline.append(String.format("%.8f\t%.8f\t", log10PofDgivenAFi[baseIndex][i], alleleFrequencyPosteriors[baseIndex][i])); + double PofDgivenAF = log10PofDgivenAFi[baseIndex][i]; + if ( PofDgivenAF == VALUE_NOT_CALCULATED ) + PofDgivenAF = 0.0; + AFline.append(String.format("%.8f\t%.8f\t", PofDgivenAF, alleleFrequencyPosteriors[baseIndex][i])); } else { AFline.append(String.format("%.8f\t%.8f\t", -1.0, -1.0)); } @@ -263,7 +267,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc verboseWriter.println("Qscore_" + base + " = " + phredScaledConfidence); verboseWriter.println("LOD_" + base + " = " + phredScaledConfidence); } else { - verboseWriter.println("P(f>0)_" + base + " = " + Math.log10(PofFs[baseIndex])); + verboseWriter.println("P(f>0)_" + base + " = " + PofFs[baseIndex]); verboseWriter.println("Qscore_" + base + " = " + (QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[baseIndex][0]))); verboseWriter.println("LOD_" + base + " = " + (Math.log10(PofFs[baseIndex]) - Math.log10(alleleFrequencyPosteriors[baseIndex][0]))); } @@ -337,42 +341,35 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc } if ( locusdata instanceof SLODBacked && REPORT_SLOD ) { // the overall lod - double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); - double overallLog10PofF = Math.log10(PofFs[indexOfMax]); + double overallLog10PofNull = log10AlleleFrequencyPriors[0] + log10PofDgivenAFi[indexOfMax][0]; + double overallLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess]; double lod = overallLog10PofF - overallLog10PofNull; - // I'm not sure what to do when the numbers are so small as to make the lod infinite. - // For now, we'll just not compute strand bias... - if ( !Double.isInfinite(lod) ) { + // the forward lod + initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); + calculatePofFs(ref, frequencyEstimationPoints); + double forwardLog10PofNull = log10AlleleFrequencyPriors[0] + log10PofDgivenAFi[indexOfMax][0]; + double forwardLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess]; - // the forward lod - initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); - calculatePofFs(ref, frequencyEstimationPoints); - double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); - double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); - if ( Double.isInfinite(lod) ) - lod = -1.0 * log10PofDgivenAFi[indexOfMax][0]; + // the reverse lod + initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); + calculatePofFs(ref, frequencyEstimationPoints); + double reverseLog10PofNull = log10AlleleFrequencyPriors[0] + log10PofDgivenAFi[indexOfMax][0]; + double reverseLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess]; - // the reverse lod - initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); - calculatePofFs(ref, frequencyEstimationPoints); - double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); - double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); + double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull; + double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull; + //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); - double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull; - double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull; - //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + // strand score is max bias between forward and reverse strands + double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + // rescale by a factor of 10 + strandScore *= 10.0; + //logger.debug(String.format("SLOD=%f", strandScore)); - // strand score is max bias between forward and reverse strands - double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - // rescale by a factor of 10 - strandScore *= 10.0; - //logger.debug(String.format("SLOD=%f", strandScore)); - - ((SLODBacked)locusdata).setSLOD(strandScore); - } + ((SLODBacked)locusdata).setSLOD(strandScore); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a96eb8037..cae055340 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -132,7 +132,7 @@ public class UnifiedGenotyper extends LocusWalker e = new HashMap(); - e.put( "-genotype", "65abc7cbf940f89037ec61b764054060" ); - e.put( "-all_bases", "ff63408d7b5aeb90bc85e471d9de1390" ); - e.put( "--min_base_quality_score 26", "287858958aa8cf3be8b43c12cb317ebc" ); - e.put( "--min_mapping_quality_score 26", "4e70544703309610e70164db100c66b7" ); - e.put( "--max_mismatches_in_40bp_window 5", "14e17c27b6559f9b98d91842c271db3f" ); + e.put( "-genotype", "5200cb9c80228e9824cb92987d665756" ); + e.put( "-all_bases", "ebb2179d84fbdb1ae1e0945707bfd9e5" ); + e.put( "--min_base_quality_score 26", "0f20580897553804d32e7d52498e1fe0" ); + e.put( "--min_mapping_quality_score 26", "8966d53d86d9ba5762b0ce45ef5f7db3" ); + e.put( "--max_mismatches_in_40bp_window 5", "74fc45099520f84e5d185bddb025b1d7" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -116,7 +116,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, - Arrays.asList("ae98a5e96af774514eac5dea31078e0c")); + Arrays.asList("ed3590332d862f160583e00ae7125ade")); executeTest("testConfidence", spec); }