diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java index 4b0ac4b19..e8e405afc 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import java.util.ArrayList; import java.util.HashMap; @@ -108,48 +109,84 @@ public class StratifiedAlignmentContext { public static Map splitContextBySample(ReadBackedPileup pileup, String assumedSingleSample, String collapseToThisSample) { HashMap contexts = new HashMap(); + GenomeLoc loc = pileup.getLocation(); - for (PileupElement p : pileup ) { - - // get the read - SAMRecord read = p.getRead(); - - // find the sample - String sample; - if ( collapseToThisSample != null ) { - sample = collapseToThisSample; - } else { - SAMReadGroupRecord readGroup = read.getReadGroup(); - if ( readGroup == null ) { - if ( assumedSingleSample == null ) - throw new StingException("Missing read group for read " + read.getReadName()); - sample = assumedSingleSample; - } else { - sample = readGroup.getSample(); - } - } - - // create a new context object if this is the first time we're seeing a read for this sample - StratifiedAlignmentContext myContext = contexts.get(sample); - if ( myContext == null ) { - myContext = new StratifiedAlignmentContext(pileup.getLocation()); - contexts.put(sample, myContext); - } - - // add the read to this sample's context - // note that bad bases are added to the context (for DoC calculations later) - myContext.add(read, p.getOffset()); - } + for (PileupElement p : pileup ) + addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample); return contexts; } + /** + * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample. + * + * @param pileup the original pileup + * + * @return a Map of sample name to StratifiedAlignmentContext + * + **/ + public static Map splitContextBySample(ReadBackedExtendedEventPileup pileup) { + return splitContextBySample(pileup, null, null); + } + + /** + * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample. + * + * @param pileup the original pileup + * @param assumedSingleSample if not null, any read without a readgroup will be given this sample name + * @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group + * + * @return a Map of sample name to StratifiedAlignmentContext + * + **/ + public static Map splitContextBySample(ReadBackedExtendedEventPileup pileup, String assumedSingleSample, String collapseToThisSample) { + + HashMap contexts = new HashMap(); + GenomeLoc loc = pileup.getLocation(); + + for (PileupElement p : pileup ) + addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample); + + return contexts; + } + + private static void addToContext(HashMap contexts, PileupElement p, GenomeLoc loc, String assumedSingleSample, String collapseToThisSample) { + // get the read + SAMRecord read = p.getRead(); + + // find the sample + String sample; + if ( collapseToThisSample != null ) { + sample = collapseToThisSample; + } else { + SAMReadGroupRecord readGroup = read.getReadGroup(); + if ( readGroup == null ) { + if ( assumedSingleSample == null ) + throw new StingException("Missing read group for read " + read.getReadName()); + sample = assumedSingleSample; + } else { + sample = readGroup.getSample(); + } + } + + // create a new context object if this is the first time we're seeing a read for this sample + StratifiedAlignmentContext myContext = contexts.get(sample); + if ( myContext == null ) { + myContext = new StratifiedAlignmentContext(loc); + contexts.put(sample, myContext); + } + + // add the read to this sample's context + // note that bad bases are added to the context (for DoC calculations later) + myContext.add(read, p.getOffset()); + } + /** * Splits the given AlignmentContext into a StratifiedAlignmentContext per read group. * * @param pileup the original pileup * @return a Map of sample name to StratifiedAlignmentContext - * @todo - support for collapsing or assuming read groups if they are missing + * TODO - support for collapsing or assuming read groups if they are missing * **/ public static Map splitContextByReadGroup(ReadBackedPileup pileup) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index e13bab43c..fe79c740f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -83,7 +83,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { * @param stratifiedContexts stratified alignment contexts * @param priors priors to use for GL * - * @return calls + * @return call */ public abstract VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, @@ -91,6 +91,19 @@ public abstract class GenotypeCalculationModel implements Cloneable { Map stratifiedContexts, DiploidGenotypePriors priors); + /** + * @param tracker rod data + * @param ref reference base + * @param loc GenomeLoc + * @param stratifiedContexts stratified alignment contexts + * + * @return call + */ + public abstract VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, + char ref, + GenomeLoc loc, + Map stratifiedContexts); + /** * @param tracker rod data * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index bafdba91d..f0a8596c8 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -39,6 +39,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected JointEstimateGenotypeCalculationModel() {} + public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map stratifiedContexts) { + return null; + } + public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { int numSamples = getNSamples(contexts); int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index bd76cd10f..563a4cce8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -11,13 +11,25 @@ import java.util.*; public class SimpleIndelCalculationModel extends GenotypeCalculationModel { + // the previous normal event context + private Map cachedContext; + protected SimpleIndelCalculationModel() {} public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { - + cachedContext = contexts; return null; + } + + public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts) { + + System.out.println("Reached " + loc + " through an extended event"); + + // TODO -- implement me //VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes); //return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD); + + return null; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index e55892fee..0adde64fe 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -81,9 +81,12 @@ public class UnifiedGenotyper extends LocusWalker UAC.MAX_DELETION_FRACTION ) - return null; + // filter the context based on bad mates and mismatch rate + pileup = filterPileup(pileup, refContext); - // stratify the AlignmentContext and cut by sample - // Note that for testing purposes, we may want to throw multi-samples at pooled mode - Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null)); - if ( stratifiedContexts == null ) - return null; + // don't call when there is no coverage + if ( pileup.size() == 0 ) + return null; - DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR); - VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors); + // stratify the AlignmentContext and cut by sample + Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, null); + if ( stratifiedContexts == null ) + return null; - // annotate the call, if possible - if ( call != null && call.vc != null ) { - // first off, we want to use the *unfiltered* context for the annotations - stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup()); + call = gcm.get().callExtendedLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts); - Map annotations; - if ( UAC.ALL_ANNOTATIONS ) - annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3); - else - annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3); - ((MutableVariantContext)call.vc).putAttributes(annotations); + } else { + + ReadBackedPileup rawPileup = rawContext.getBasePileup(); + + // filter the context based on min base and mapping qualities + ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE); + + // filter the context based on bad mates and mismatch rate + pileup = filterPileup(pileup, refContext); + + // don't call when there is no coverage + if ( pileup.size() == 0 ) + return null; + + // are there too many deletions in the pileup? + if ( UAC.genotypeModel != GenotypeCalculationModel.Model.INDELS && + isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) && + (double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION ) + return null; + + // stratify the AlignmentContext and cut by sample + // Note that for testing purposes, we may want to throw multi-samples at pooled mode + Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null)); + if ( stratifiedContexts == null ) + return null; + + DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR); + call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors); + + // annotate the call, if possible + if ( call != null && call.vc != null ) { + // first off, we want to use the *unfiltered* context for the annotations + stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup()); + + Map annotations; + if ( UAC.ALL_ANNOTATIONS ) + annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3); + else + annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3); + ((MutableVariantContext)call.vc).putAttributes(annotations); + } } return call; @@ -221,6 +248,18 @@ public class UnifiedGenotyperEngine { return new ReadBackedPileup(pileup.getLocation(), filteredPileup); } + // filter based on maximum mismatches and bad mates + private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) { + + ArrayList filteredPileup = new ArrayList(); + for ( ExtendedEventPileupElement p : pileup ) { + if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) && + AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES ) + filteredPileup.add(p); + } + return new ReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup); + } + private static boolean isValidDeletionFraction(double d) { return ( d >= 0.0 && d <= 1.0 ); } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index e5588631b..1e519d1e7 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -5,7 +5,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import net.sf.samtools.SAMRecord; import java.util.Arrays; -import java.util.Collections; /** * In the "standard" locus traversal mode, @@ -33,7 +32,7 @@ import java.util.Collections; * Time: 2:57:55 PM * To change this template use File | Settings | File Templates. */ -public class ExtendedEventPileupElement { +public class ExtendedEventPileupElement extends PileupElement { public enum Type { NOEVENT, DELETION, INSERTION }; @@ -53,8 +52,7 @@ public class ExtendedEventPileupElement { * @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent) */ public ExtendedEventPileupElement( SAMRecord read, int offset, int length, byte[] eventBases ) { - this.read = read; - this.offset = offset; + super(read, offset); this.eventLength = length; if ( length <= 0 ) type = Type.NOEVENT; else { @@ -89,12 +87,6 @@ public class ExtendedEventPileupElement { return isDeletion() || isInsertion(); } - public SAMRecord getRead() { return read; } - - public int getOffset() { return offset; } - - public int getMappingQual() { return read.getMappingQuality(); } - public Type getType() { return type; } public GenomeLoc getLocation() { diff --git a/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index c196a9af8..1c61a4d00 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -19,8 +19,8 @@ public class PileupElement { public static final byte DELETION_BASE = 'D'; public static final byte DELETION_QUAL = 0; - private SAMRecord read; - private int offset; + protected SAMRecord read; + protected int offset; public PileupElement( SAMRecord read, int offset ) { this.read = read;