diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 54197f6f1..4f46c58fe 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -13,6 +13,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul // the GenotypeLikelihoods map private HashMap GLs = new HashMap(); + private enum GenotypeType { REF, HET, HOM } protected HashMap createContexts(AlignmentContext context) { return splitContextBySample(context); @@ -22,7 +23,8 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul return contexts.size(); } - protected void initializeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType) { + protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { + // initialize the GenotypeLikelihoods GLs.clear(); // use flat priors for GLs @@ -39,7 +41,11 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul } } - protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) { + protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType) { + DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); + DiploidGenotype hetGenotype = ref < alt ? DiploidGenotype.valueOf(String.valueOf(ref) + String.valueOf(alt)) : DiploidGenotype.valueOf(String.valueOf(alt) + String.valueOf(ref)); + DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt); + double PofDgivenAFi = 0.0; // for each sample diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 8d0656018..cc255fe03 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -10,8 +10,6 @@ import java.util.*; public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalculationModel { - protected JointEstimateGenotypeCalculationModel() {} - // because the null allele frequencies are constant for a given N, // we cache the results to avoid having to recompute everything private HashMap nullAlleleFrequencyCache = new HashMap(); @@ -20,8 +18,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // we cache the results to avoid having to recompute everything private HashMap hardyWeinbergValueCache = new HashMap(); - protected enum GenotypeType { REF, HET, HOM } - // the allele frequency priors protected double[] log10AlleleFrequencyPriors; @@ -30,13 +26,17 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected double[][] log10PofDgivenAFi = new double[BaseUtils.BASES.length][]; protected double[] PofFs = new double[BaseUtils.BASES.length]; - - // these need to be implemented - protected abstract HashMap createContexts(AlignmentContext context); - protected abstract void initializeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType); - protected abstract double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f); - protected abstract List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc); + + // The abstract methods which need to be implemented: + // 1. How many samples are being evaluated? protected abstract int getNSamples(HashMap contexts); + // 2. Create stratified contexts from the original alignment context + protected abstract HashMap createContexts(AlignmentContext context); + // 3. The likelihoods calculation underlying this whole model + protected abstract double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType); + + + protected JointEstimateGenotypeCalculationModel() {} public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { @@ -50,8 +50,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc initializeAlleleFrequencies(frequencyEstimationPoints); - initializeLikelihoods(ref, contexts, StratifiedContext.OVERALL); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints); + initialize(ref, contexts, StratifiedContext.OVERALL); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL); calculatePofFs(ref, frequencyEstimationPoints); // print out stats if we have a writer @@ -61,7 +61,12 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints); } - private void initializeAlleleFrequencies(int frequencyEstimationPoints) { + protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { + // by default, no initialization is done + return; + } + + protected void initializeAlleleFrequencies(int frequencyEstimationPoints) { // set up the allele frequency priors log10AlleleFrequencyPriors = getNullAlleleFrequencyPriors(frequencyEstimationPoints); } @@ -98,7 +103,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return AFs; } - protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints) { + protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints, HashMap contexts, StratifiedContext contextType) { // initialization for ( char altAllele : BaseUtils.BASES ) { @@ -106,8 +111,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc alleleFrequencyPosteriors[baseIndex] = new double[frequencyEstimationPoints]; log10PofDgivenAFi[baseIndex] = new double[frequencyEstimationPoints]; } - DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref); - String refStr = String.valueOf(ref); // for each alternate allele for ( char altAllele : BaseUtils.BASES ) { @@ -116,13 +119,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele); - DiploidGenotype hetGenotype = ref < altAllele ? DiploidGenotype.valueOf(refStr + String.valueOf(altAllele)) : DiploidGenotype.valueOf(String.valueOf(altAllele) + refStr); - DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(altAllele); - // for each minor allele frequency for (int i = 0; i < frequencyEstimationPoints; i++) { double f = (double)i / (double)(frequencyEstimationPoints-1); - log10PofDgivenAFi[baseIndex][i] += computeLog10PofDgivenAFi(refGenotype, hetGenotype, homGenotype, f); + log10PofDgivenAFi[baseIndex][i] += computeLog10PofDgivenAFi(ref, altAllele, f, contexts, contextType); } } } @@ -210,6 +210,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc verboseWriter.println(); } + protected List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc) { + // by default, we return no genotypes + return new ArrayList(); + } + protected Pair, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc, int frequencyEstimationPoints) { // first, find the alt allele with maximum confidence int indexOfMax = 0; @@ -260,15 +265,15 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc double lod = overallLog10PofF - overallLog10PofNull; // the forward lod - initializeLikelihoods(ref, contexts, StratifiedContext.FORWARD); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints); + initialize(ref, contexts, StratifiedContext.FORWARD); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.FORWARD); calculatePofFs(ref, frequencyEstimationPoints); double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); // the reverse lod - initializeLikelihoods(ref, contexts, StratifiedContext.REVERSE); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints); + initialize(ref, contexts, StratifiedContext.REVERSE); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.REVERSE); calculatePofFs(ref, frequencyEstimationPoints); double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index c4dc94a51..d7eecfa6e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -1,8 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.ReadBackedPileup; import java.util.*; @@ -10,8 +9,9 @@ import net.sf.samtools.SAMRecord; public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel { - protected PooledCalculationModel() { - } + private static final String POOL_SAMPLE_NAME = "POOL"; + + protected PooledCalculationModel() {} protected int getNSamples(HashMap contexts) { return POOL_SIZE; @@ -43,23 +43,17 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode } HashMap contexts = new HashMap(); - contexts.put("POOL", pooledContext); + contexts.put(POOL_SAMPLE_NAME, pooledContext); return contexts; } - - protected void initializeLikelihoods(char ref, HashMap contexts, StratifiedContext contextType) { - return; - } - protected double computeLog10PofDgivenAFi(DiploidGenotype refGenotype, DiploidGenotype hetGenotype, DiploidGenotype homGenotype, double f) { - System.out.printf("%s %.2f%n", hetGenotype, f); + protected double computeLog10PofDgivenAFi(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType) { + AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME); + ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); + + System.out.printf("%s %.2f %d%n", alt, f, pileup.getBases().length()); // TODO -- IMPLEMENT ME return -100.0; } - - protected List makeGenotypeCalls(char ref, HashMap contexts, GenomeLoc loc) { - // no genotypes in pooled mode - return new ArrayList(); - } } \ No newline at end of file