diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java new file mode 100755 index 000000000..5698b90fc --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.contexts; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * Useful class for storing different AlignmentContexts + * User: ebanks + */ +public class StratifiedAlignmentContext { + + public enum StratifiedContextType { OVERALL, FORWARD, REVERSE } + + private AlignmentContext overall = null; + private AlignmentContext forward = null; + private AlignmentContext reverse = null; + private GenomeLoc loc; + + private ArrayList allReads = new ArrayList(); + private ArrayList forwardReads = new ArrayList(); + private ArrayList reverseReads = new ArrayList(); + + private ArrayList allOffsets = new ArrayList(); + private ArrayList forwardOffsets = new ArrayList(); + private ArrayList reverseOffsets = new ArrayList(); + + + public StratifiedAlignmentContext(GenomeLoc loc) { + this.loc = loc; + } + + public AlignmentContext getContext(StratifiedContextType context) { + switch ( context ) { + case OVERALL: return getOverallContext(); + case FORWARD: return getForwardContext(); + case REVERSE: return getReverseContext(); + } + return null; + } + + private AlignmentContext getOverallContext() { + if ( overall == null ) + overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets)); + return overall; + } + + private AlignmentContext getForwardContext() { + if ( forward == null ) + forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets)); + return forward; + } + + private AlignmentContext getReverseContext() { + if ( reverse == null ) + reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets)); + return reverse; + } + + public void add(SAMRecord read, int offset) { + if ( read.getReadNegativeStrandFlag() ) { + reverseReads.add(read); + reverseOffsets.add(offset); + } else { + forwardReads.add(read); + forwardOffsets.add(offset); + } + allReads.add(read); + allOffsets.add(offset); + } + + /** + * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample. + * + * @param context the original AlignmentContext + * @param assumedSingleSample if not null, any read without a readgroup will be given this sample name + * @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group + * + * @return a Map of sample name to StratifiedAlignmentContext + * + **/ + public static Map splitContextBySample(AlignmentContext context, String assumedSingleSample, String collapseToThisSample) { + + HashMap contexts = new HashMap(); + + ReadBackedPileup pileup = context.getPileup(); + for (PileupElement p : pileup ) { + + // get the read + SAMRecord read = p.getRead(); + + // find the sample + String sample; + if ( collapseToThisSample != null ) { + sample = collapseToThisSample; + } else { + SAMReadGroupRecord readGroup = read.getReadGroup(); + if ( readGroup == null ) { + if ( assumedSingleSample == null ) + throw new StingException("Missing read group for read " + read.getReadName()); + sample = assumedSingleSample; + } else { + sample = readGroup.getSample(); + } + } + + // create a new context object if this is the first time we're seeing a read for this sample + StratifiedAlignmentContext myContext = contexts.get(sample); + if ( myContext == null ) { + myContext = new StratifiedAlignmentContext(context.getLocation()); + contexts.put(sample, myContext); + } + + // add the read to this sample's context + // note that bad bases are added to the context (for DoC calculations later) + myContext.add(read, p.getOffset()); + } + + return contexts; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 8985380ef..ed8ce27a1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import java.util.*; @@ -16,7 +17,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul private enum GenotypeType { REF, HET, HOM } - protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { + protected void initialize(char ref, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { // initialize the GenotypeLikelihoods GLs.clear(); AFMatrixMap.clear(); @@ -32,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul int index = 0; for ( String sample : contexts.keySet() ) { - AlignmentContextBySample context = contexts.get(sample); + StratifiedAlignmentContext context = contexts.get(sample); ReadBackedPileup pileup = context.getContext(contextType).getPileup(); // create the GenotypeLikelihoods object @@ -55,7 +56,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul } } - protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, HashMap contexts, StratifiedContext contextType) { + protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt); int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt); @@ -73,7 +74,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul } } - protected List makeGenotypeCalls(char ref, char alt, HashMap contexts, GenomeLoc loc) { + protected List makeGenotypeCalls(char ref, char alt, Map contexts, GenomeLoc loc) { ArrayList calls = new ArrayList(); for ( String sample : GLs.keySet() ) { @@ -82,7 +83,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup(); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 37c3138d6..4b987a099 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.*; @@ -19,15 +19,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode protected EMGenotypeCalculationModel() {} - public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { - - // keep track of the context for each sample, overall and separated by strand - HashMap contexts = splitContextBySample(context); - if ( contexts == null ) - return null; + public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { // run the EM calculation - EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL); + EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL); double PofD = Math.pow(10, overall.getPofD()); double PofNull = Math.pow(10, overall.getPofNull()); @@ -50,7 +45,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode // generate the calls List calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts); - VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); + VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); if ( locusdata != null ) { if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); @@ -67,8 +62,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode double lod = overall.getPofD() - overall.getPofNull(); logger.debug(String.format("LOD=%f", lod)); - EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); - EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); + EMOutput forward = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.FORWARD); + EMOutput reverse = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.REVERSE); double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); @@ -89,7 +84,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode return new Pair>(locusdata, calls); } - protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { + protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, Map contexts) { HashMap GLs = results.getGenotypeLikelihoods(); ArrayList calls = new ArrayList(); @@ -98,11 +93,11 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode for ( String sample : GLs.keySet() ) { // create the call - AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); + AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL); GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup(); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { @@ -129,7 +124,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode return calls; } - public EMOutput runEM(char ref, HashMap contexts, DiploidGenotypePriors priors, StratifiedContext contextType) { + public EMOutput runEM(char ref, Map contexts, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) { // initialize the allele frequencies initializeAlleleFrequencies(contexts.size(), ref); @@ -157,7 +152,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode } protected abstract void initializeAlleleFrequencies(int numSamplesInContext, char ref); - protected abstract void initializeGenotypeLikelihoods(char ref, HashMap contexts, DiploidGenotypePriors priors, StratifiedContext contextType); + protected abstract void initializeGenotypeLikelihoods(char ref, Map contexts, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType); protected abstract void calculateAlleleFrequencyPosteriors(); protected abstract void applyAlleleFrequencyToGenotypeLikelihoods(); protected abstract boolean isStable(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 8baa18c0f..9d594b755 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -1,14 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.apache.log4j.Logger; import java.io.*; @@ -41,8 +37,6 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected double CONFIDENCE_THRESHOLD; protected double MINIMUM_ALLELE_FREQUENCY; protected boolean REPORT_SLOD; - protected double maxDeletionFractionInPileup; - protected String assumedSingleSample; protected PrintWriter verboseWriter; /** @@ -72,11 +66,6 @@ public abstract class GenotypeCalculationModel implements Cloneable { POOL_SIZE = UAC.POOLSIZE; CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD; MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY; - maxDeletionFractionInPileup = UAC.MAX_DELETION_FRACTION; - // disable if that's what is requested - if ( maxDeletionFractionInPileup < 0.0 || maxDeletionFractionInPileup > 1.0 ) - maxDeletionFractionInPileup = -1.0; - assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE; if ( UAC.VERBOSE != null ) { try { verboseWriter = new PrintWriter(UAC.VERBOSE); @@ -97,16 +86,18 @@ public abstract class GenotypeCalculationModel implements Cloneable { /** * Must be overridden by concrete subclasses - * @param tracker rod data - * @param ref reference base - * @param context alignment context - * @param priors priors to use for GL + * @param tracker rod data + * @param ref reference base + * @param loc GenomeLoc + * @param stratifiedContexts stratified alignment contexts + * @param priors priors to use for GL * * @return calls */ public abstract Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, - AlignmentContext context, + GenomeLoc loc, + Map stratifiedContexts, DiploidGenotypePriors priors); /** @@ -128,119 +119,4 @@ public abstract class GenotypeCalculationModel implements Cloneable { private static boolean isHapmapSite(RefMetaDataTracker tracker) { return tracker.getTrackData("hapmap", null) != null; } - - /** - * Create the mapping from sample to alignment contexts. - * @param context original alignment context - * - * @return stratified contexts, or null if there are too many deletions at this position - */ - protected HashMap splitContextBySample(AlignmentContext context) { - - HashMap contexts = new HashMap(); - - ReadBackedPileup pileup = context.getPileup(); - for (PileupElement p : pileup ) { - - // get the read and offset - SAMRecord read = p.getRead(); - - // find the sample - String sample; - SAMReadGroupRecord readGroup = read.getReadGroup(); - if ( readGroup == null ) { - if ( assumedSingleSample == null ) - throw new StingException("Missing read group for read " + read.getReadName()); - sample = assumedSingleSample; - } else { - sample = readGroup.getSample(); - } - - // create a new context object if this is the first time we're seeing a read for this sample - AlignmentContextBySample myContext = contexts.get(sample); - if ( myContext == null ) { - myContext = new AlignmentContextBySample(context.getLocation()); - contexts.put(sample, myContext); - } - - // add the read to this sample's context - // note that bad bases are added to the context (for DoC calculations later) - myContext.add(read, p.getOffset()); - } - - - // are there too many deletions in the pileup? - if ( maxDeletionFractionInPileup != -1.0 && - (double)pileup.getNumberOfDeletions() / (double)pileup.size() > maxDeletionFractionInPileup ) - return null; - - return contexts; - } - - - /** - * A class to keep track of the alignment context observed for a given sample. - * we currently store the overall context and strand-stratified sets, - * but any arbitrary stratification could be added. - */ - protected enum StratifiedContext { OVERALL, FORWARD, REVERSE } - protected class AlignmentContextBySample { - - private AlignmentContext overall = null; - private AlignmentContext forward = null; - private AlignmentContext reverse = null; - private GenomeLoc loc; - - private ArrayList allReads = new ArrayList(); - private ArrayList forwardReads = new ArrayList(); - private ArrayList reverseReads = new ArrayList(); - - private ArrayList allOffsets = new ArrayList(); - private ArrayList forwardOffsets = new ArrayList(); - private ArrayList reverseOffsets = new ArrayList(); - - - AlignmentContextBySample(GenomeLoc loc) { - this.loc = loc; - } - - public AlignmentContext getContext(StratifiedContext context) { - switch ( context ) { - case OVERALL: return getOverallContext(); - case FORWARD: return getForwardContext(); - case REVERSE: return getReverseContext(); - } - return null; - } - - private AlignmentContext getOverallContext() { - if ( overall == null ) - overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets)); - return overall; - } - - private AlignmentContext getForwardContext() { - if ( forward == null ) - forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets)); - return forward; - } - - private AlignmentContext getReverseContext() { - if ( reverse == null ) - reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets)); - return reverse; - } - - public void add(SAMRecord read, int offset) { - if ( read.getReadNegativeStrandFlag() ) { - reverseReads.add(read); - reverseOffsets.add(offset); - } else { - forwardReads.add(read); - forwardOffsets.add(offset); - } - allReads.add(read); - allOffsets.add(offset); - } - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index d5a1e26ad..edd87adba 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.Variation.VARIANT_TYPE; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.*; import java.util.*; import java.io.PrintWriter; @@ -35,18 +35,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected JointEstimateGenotypeCalculationModel() {} - public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { - - // keep track of the context for each sample, overall and separated by strand - HashMap contexts = createContexts(context); - if ( contexts == null ) - return null; + public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { int numSamples = getNSamples(contexts); int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero) // find the alternate allele with the largest sum of quality scores - initializeBestAlternateAllele(ref, context); + initializeBestAlternateAllele(ref, contexts); // if there are no non-ref bases... if ( bestAlternateAllele == null ) { @@ -60,38 +55,38 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc initializeAlleleFrequencies(frequencyEstimationPoints); - initialize(ref, contexts, StratifiedContext.OVERALL); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL); + initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL); calculatePofFs(ref, frequencyEstimationPoints); // print out stats if we have a writer if ( verboseWriter != null ) - printAlleleFrequencyData(ref, context.getLocation(), frequencyEstimationPoints); + printAlleleFrequencyData(ref, loc, frequencyEstimationPoints); - return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints); + return createCalls(tracker, ref, contexts, loc, frequencyEstimationPoints); } - protected HashMap createContexts(AlignmentContext context) { - return splitContextBySample(context); - } - - protected int getNSamples(HashMap contexts) { + protected int getNSamples(Map contexts) { return contexts.size(); } - protected void initializeBestAlternateAllele(char ref, AlignmentContext context) { + protected void initializeBestAlternateAllele(char ref, Map contexts) { int[] qualCounts = new int[4]; - // calculate the sum of quality scores for each base - ReadBackedPileup pileup = context.getPileup(); - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() ) - continue; + for ( String sample : contexts.keySet() ) { + AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL); - int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase()); - if ( index >= 0 ) - qualCounts[index] += p.getQual(); + // calculate the sum of quality scores for each base + ReadBackedPileup pileup = context.getPileup(); + for ( PileupElement p : pileup ) { + // ignore deletions + if ( p.isDeletion() ) + continue; + + int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase()); + if ( index >= 0 ) + qualCounts[index] += p.getQual(); + } } // set the non-ref base with maximum quality score sum @@ -118,7 +113,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc verboseWriter.println(header); } - protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { + protected void initialize(char ref, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { // by default, no initialization is done return; } @@ -160,7 +155,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return AFs; } - protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints, HashMap contexts, StratifiedContext contextType) { + protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { // initialization for ( char altAllele : BaseUtils.BASES ) { @@ -185,7 +180,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc * @param contexts stratified alignment contexts * @param contextType which stratification to use */ - protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, HashMap contexts, StratifiedContext contextType) { + protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt); // for each minor allele frequency, calculate log10PofDgivenAFi @@ -204,7 +199,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc * * @return value of PofDgivenAF for allele frequency f */ - protected double calculateLog10PofDgivenAFforF(char ref, char alt, double f, HashMap contexts, StratifiedContext contextType) { + protected double calculateLog10PofDgivenAFforF(char ref, char alt, double f, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { return 0.0; } @@ -304,12 +299,12 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc verboseWriter.println(); } - protected List makeGenotypeCalls(char ref, char alt, HashMap contexts, GenomeLoc loc) { + protected List makeGenotypeCalls(char ref, char alt, Map contexts, GenomeLoc loc) { // by default, we return no genotypes return new ArrayList(); } - protected Pair> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc, int frequencyEstimationPoints) { + protected Pair> createCalls(RefMetaDataTracker tracker, char ref, Map contexts, GenomeLoc loc, int frequencyEstimationPoints) { // only need to look at the most likely alternate allele int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele); @@ -363,8 +358,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc if ( !Double.isInfinite(lod) ) { // the forward lod - initialize(ref, contexts, StratifiedContext.FORWARD); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.FORWARD); + initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); calculatePofFs(ref, frequencyEstimationPoints); double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double forwardLog10PofF = Math.log10(PofFs[indexOfMax]); @@ -372,8 +367,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc lod = -1.0 * log10PofDgivenAFi[indexOfMax][0]; // the reverse lod - initialize(ref, contexts, StratifiedContext.REVERSE); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.REVERSE); + initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); calculatePofFs(ref, frequencyEstimationPoints); double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); double reverseLog10PofF = Math.log10(PofFs[indexOfMax]); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index a0ea79dca..7470fa65c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.*; @@ -26,26 +26,21 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // overload this method so we can special-case the single sample - public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { // we don't actually want to run EM for single samples if ( samples.size() == 1 ) { - // split the context (so we can get forward/reverse contexts for free) - HashMap contexts = splitContextBySample(context); - if ( contexts == null ) - return null; - // get the context for the sample String sample = samples.iterator().next(); - AlignmentContextBySample sampleContext = contexts.get(sample); + StratifiedAlignmentContext sampleContext = contexts.get(sample); // if there were no good bases, the context wouldn't exist if ( sampleContext == null ) return null; // get the genotype likelihoods - Pair discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedContext.OVERALL); + Pair discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL); // find the index of the best genotype double[] posteriors = discoveryGL.second.getNormalizedPosteriors(); @@ -71,7 +66,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation return new Pair>(null, null); // we can now create the genotype call object - GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); + GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); if ( call instanceof ReadBacked ) { ((ReadBacked)call).setPileup(discoveryGL.first); @@ -86,7 +81,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); } - VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); + VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); if ( locusdata != null ) { if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); @@ -104,10 +99,10 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation return new Pair>(locusdata, Arrays.asList((Genotype)call)); } - return super.calculateGenotype(tracker, ref, context, priors); + return super.calculateGenotype(tracker, ref, loc, contexts, priors); } - private Pair getSingleSampleLikelihoods(AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) { + private Pair getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) { // create the pileup AlignmentContext myContext = sampleContext.getContext(contextType); ReadBackedPileup pileup = myContext.getPileup(); @@ -127,13 +122,13 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation logger.debug("Initial allele frequency for " + BaseUtils.baseIndexToSimpleBase(i) + ": " + alleleFrequencies[i]); } - protected void initializeGenotypeLikelihoods(char ref, HashMap contexts, DiploidGenotypePriors priors, StratifiedContext contextType) { + protected void initializeGenotypeLikelihoods(char ref, Map contexts, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) { GLs.clear(); DiploidGenotypePriors AFPriors = calculateAlleleFreqBasedPriors(alleleFrequencies); for ( String sample : contexts.keySet() ) { - AlignmentContextBySample context = contexts.get(sample); + StratifiedAlignmentContext context = contexts.get(sample); ReadBackedPileup pileup = context.getContext(contextType).getPileup(); // create the GenotypeLikelihoods object diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index ba38be29c..c74c2f074 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; @@ -10,7 +10,7 @@ import java.util.*; import net.sf.samtools.SAMRecord; public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel { - private static final String POOL_SAMPLE_NAME = "POOL"; + protected static final String POOL_SAMPLE_NAME = "POOL"; private static FourBaseProbabilities fourBaseLikelihoods = null; private static boolean USE_CACHE = true; @@ -26,7 +26,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode * @param contexts * @param contextType */ - protected void initialize(char ref, HashMap contexts, StratifiedContext contextType) { + protected void initialize(char ref, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { super.initialize(ref, contexts, contextType); // todo -- move this code to a static initializer @@ -40,43 +40,13 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode makeCache(POOL_SIZE); } - protected int getNSamples(HashMap contexts) { + protected int getNSamples(Map contexts) { return POOL_SIZE; } - - protected HashMap createContexts(AlignmentContext context) { - // for testing purposes, we may want to throw multi-samples at pooled mode, - // so we can't use the standard splitContextBySample() method here - AlignmentContextBySample pooledContext = new AlignmentContextBySample(context.getLocation()); - int deletionsInPileup = 0; - List reads = context.getReads(); - List offsets = context.getOffsets(); + protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { - for (int i = 0; i < reads.size(); i++) { - // check for deletions - int offset = offsets.get(i); - if ( offset == -1 ) - deletionsInPileup++; - - // add the read to this sample's context - // note that bad bases are added to the context (for DoC calculations later) - pooledContext.add(reads.get(i), offset); - } - - // are there too many deletions in the pileup? - if ( maxDeletionFractionInPileup != -1.0 && - (double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup ) - return null; - - HashMap contexts = new HashMap(); - contexts.put(POOL_SAMPLE_NAME, pooledContext); - return contexts; - } - - protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap contexts, StratifiedContext contextType) { - - AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME); + StratifiedAlignmentContext context = contexts.get(POOL_SAMPLE_NAME); ReadBackedPileup pileup = context.getContext(contextType).getPileup(); int refIndex = BaseUtils.simpleBaseToBaseIndex(ref); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 168dffd95..7e9d5b11e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,22 +25,23 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMReadGroupRecord;import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.cmdLine.*; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; +import net.sf.samtools.SAMReadGroupRecord; + import java.io.File; import java.util.*; -import java.lang.reflect.Field; @Reference(window=@Window(start=-20,stop=20)) @@ -178,8 +179,8 @@ public class UnifiedGenotyper extends LocusWalker commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(Collections.singleton(UAC)); - for(Map.Entry commandLineArg: commandLineArgs.entrySet()) - headerInfo.add(String.format("UG_%s=%s",commandLineArg.getKey(),commandLineArg.getValue())); + for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) + headerInfo.add(String.format("UG_%s=%s", commandLineArg.getKey(), commandLineArg.getValue())); return headerInfo; } @@ -204,8 +205,20 @@ public class UnifiedGenotyper extends LocusWalker 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) ) return null; + // are there too many deletions in the pileup? + ReadBackedPileup pileup = MQ0freeContext.getPileup(); + if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) && + (double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION ) + return null; + + // stratify the AlignmentContext and cut by sample + // Note that for testing purposes, we may want to throw multi-samples at pooled mode + Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(MQ0freeContext, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null)); + if ( stratifiedContexts == null ) + return null; + DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); - Pair> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors); + Pair> call = gcm.calculateGenotype(tracker, ref, fullContext.getLocation(), stratifiedContexts, priors); // annotate the call, if possible if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) { @@ -220,7 +233,11 @@ public class UnifiedGenotyper extends LocusWalker= 0.0 && d <= 1.0 ); + } + + private static AlignmentContext filterAlignmentContext(AlignmentContext context) { return new AlignmentContext(context.getLocation(), context.getPileup().getPileupWithoutMappingQualityZeroReads(), 0);