diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index c80d8c3d9..672689eab 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -16,14 +16,6 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul private enum GenotypeType { REF, HET, HOM } - protected HashMap createContexts(AlignmentContext context) { - return splitContextBySample(context); - } - - protected int getNSamples(HashMap contexts) { - return contexts.size(); - } - protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { // initialize the GenotypeLikelihoods GLs.clear(); @@ -63,17 +55,22 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul } } - protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType) { - - // *** note that this code assumes that allele frequencies are passed in IN ORDER from 0 to 2N + protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, HashMap contexts, StratifiedContext contextType) { AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt); + int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt); - // for any frequency other than zero, calculate the next greedy entry - if ( MathUtils.compareDoubles(f, 0.0) != 0 ) + // first, calculate for AF=0 (no change to matrix) + log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency(); + + // for each minor allele frequency, calculate log10PofDgivenAFi + for (int i = 1; i <= numFrequencies; i++) { + // add one more alternatr allele matrix.incrementFrequency(); - - return matrix.getLikelihoodsOfFrequency(); + + // calculate new likelihoods + log10PofDgivenAFi[baseIndex][i] = matrix.getLikelihoodsOfFrequency(); + } } protected List makeGenotypeCalls(char ref, char alt, HashMap contexts, GenomeLoc loc) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 58b0b3431..389c2a56f 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -28,15 +28,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected double[] PofFs = new double[BaseUtils.BASES.length]; - // The abstract methods which need to be implemented: - // 1. How many samples are being evaluated? - protected abstract int getNSamples(HashMap contexts); - // 2. Create stratified contexts from the original alignment context - protected abstract HashMap createContexts(AlignmentContext context); - // 3. The likelihoods calculation underlying this whole model - protected abstract double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType); - - protected JointEstimateGenotypeCalculationModel() {} public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { @@ -62,6 +53,14 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints); } + protected HashMap createContexts(AlignmentContext context) { + return splitContextBySample(context); + } + + protected int getNSamples(HashMap contexts) { + return contexts.size(); + } + protected void initializeVerboseWriter(PrintWriter verboseWriter) { StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t"); for ( char altAllele : BaseUtils.BASES ) { @@ -128,16 +127,52 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc if ( altAllele == ref ) continue; - int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); - - // for each minor allele frequency - for (int i = 0; i < frequencyEstimationPoints; i++) { - double f = (double)i / (double)(frequencyEstimationPoints-1); - log10PofDgivenAFi[baseIndex][i] += computeLog10PofDgivenAFi(ref, altAllele, f, contexts, contextType); - } + calculatelog10PofDgivenAFforAllF(ref, altAllele, frequencyEstimationPoints-1, contexts, contextType); } } + /********************************************************************************/ + /*** One or both of the following methods should be overloaded in subclasses ***/ + /*** so that the correct calculation is made for PofDgivenAFi ***/ + /********************************************************************************/ + + /** + * @param ref the ref base + * @param alt the alt base + * @param numFrequencies total number of allele frequencies (2N) + * @param contexts stratified alignment contexts + * @param contextType which stratification to use + */ + protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, HashMap contexts, StratifiedContext contextType) { + int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt); + + // for each minor allele frequency, calculate log10PofDgivenAFi + for (int i = 0; i <= numFrequencies; i++) { + double f = (double)i / (double)(numFrequencies); + log10PofDgivenAFi[baseIndex][i] += calculateLog10PofDgivenAFforF(ref, alt, f, contexts, contextType); + } + } + + /** + * @param ref the ref base + * @param alt the alt base + * @param f the allele frequency + * @param contexts stratified alignment contexts + * @param contextType which stratification to use + * + * @return value of PofDgivenAF for allele frequency f + */ + protected double calculateLog10PofDgivenAFforF(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType) { + return 0.0; + } + + /********************************************************************************/ + + + /** + * @param ref the ref base + * @param frequencyEstimationPoints number of allele frequencies + */ protected void calculatePofFs(char ref, int frequencyEstimationPoints) { // for each alternate allele for ( char altAllele : BaseUtils.BASES ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index c1faa923c..119ace22e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -72,17 +72,13 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode return contexts; } - protected double computeLog10PofDgivenAFi(char refArg, char altArg, double f, HashMap contexts, StratifiedContext contextType) { + protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap contexts, StratifiedContext contextType) { + AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME); - ReadBackedPileup pileup = new ReadBackedPileup(refArg, context.getContext(contextType)); + ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); - double log10L = 0.0; - - int refIndex = BaseUtils.simpleBaseToBaseIndex(refArg); - int altIndex = BaseUtils.simpleBaseToBaseIndex(altArg); - - int nChromosomes = 2 * getNSamples(contexts); - int nAltAlleles = (int)(f * nChromosomes); + int refIndex = BaseUtils.simpleBaseToBaseIndex(ref); + int altIndex = BaseUtils.simpleBaseToBaseIndex(alt); for (int i = 0; i < pileup.getReads().size(); i++) { int offset = pileup.getOffsets().get(i); @@ -97,11 +93,12 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode byte qual = read.getBaseQualities()[offset]; if ( qual > 0 && bIndex != -1 ) { - log10L += calcPBGivenH(refIndex, altIndex, nAltAlleles, nChromosomes, base, qual, read, offset); + + // for each minor allele frequency, calculate log10PofDgivenAFi + for (int frequency = 0; frequency <= nChromosomes; frequency++) + log10PofDgivenAFi[altIndex][frequency] += calcPBGivenH(refIndex, altIndex, frequency, nChromosomes, base, qual, read, offset); } } - - return log10L; } // cache = BMM x PL x REF x ALT x base x QUAL x strand x F as i diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 2abdee1ed..edaa0e6a3 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -59,6 +59,19 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } + // -------------------------------------------------------------------------------------------------------------- + // + // testing pooled model + // + // -------------------------------------------------------------------------------------------------------------- + @Test + public void testPooled1() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, + Arrays.asList("80fcd214108b4f4145ae585e5f119460")); + executeTest("testPooled1", spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing joint estimation model @@ -67,24 +80,24 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("aa4a5c8ae775f8e3484d50424900370e")); + "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, + Arrays.asList("e1a206a49982f7db5c3f4b65aa910b3a")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @Test public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("c262ec6b341c854a24d082cee2c8e318")); + "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, + Arrays.asList("cec6081d89ada9b2924fe38c7a021921")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @Test public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("368217e47a34bce2dac89a9786675104")); + "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, + Arrays.asList("0cd57b2b6272202db0eca45376fdb01d")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); }