From 237ab1d4895fb64a3357a6207da808f4f641290d Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 3 Dec 2010 05:57:06 +0000 Subject: [PATCH] 1. As discussed in group meeting today, because we cap BQ by MQ, if MQ < minBQ then we filter the read. 2. Update to UGCalcLikelihoods for Chris: require a vcf bound to 'allele' to be provided so that we know exactly which alternate allele we should be calculating GLs for at each site. The user is warned when the VC is not biallelic or there are multiple records at a site. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4780 348d0f76-0448-11de-a6fe-93d51630548a --- ...elGenotypeLikelihoodsCalculationModel.java | 3 ++- .../GenotypeLikelihoodsCalculationModel.java | 4 +++- ...NPGenotypeLikelihoodsCalculationModel.java | 7 +++++-- .../walkers/genotyper/UGCalcLikelihoods.java | 20 +++++++++++++++++-- .../genotyper/UnifiedGenotyperEngine.java | 19 ++++++++++-------- 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index 3690ae8dc..51d0bcc69 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -67,7 +67,8 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType, GenotypePriors priors, - Map GLs) { + Map GLs, + Allele alternateAlleleToUse) { // TODO: use this instead of reading the 'indels' ROD from the tracker below if ( tracker == null ) return null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index facef70f5..f131cf75d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -67,6 +67,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { * @param contextType stratified context type * @param priors priors to use for GLs * @param GLs hash of sample->GL to fill in + * @param alternateAlleleToUse the alternate allele to use, null if not set * * @return genotype likelihoods per sample for AA, AB, BB */ @@ -75,7 +76,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType, GenotypePriors priors, - Map GLs); + Map GLs, + Allele alternateAlleleToUse); protected int getFilteredDepth(ReadBackedPileup pileup) { int count = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 8f04ea6bc..3bdfff71d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -53,7 +53,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType, GenotypePriors priors, - Map GLs) { + Map GLs, + Allele alternateAlleleToUse) { if ( !(priors instanceof DiploidSNPGenotypePriors) ) throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); @@ -62,8 +63,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC Allele refAllele = Allele.create(refBase, true); // find the alternate allele with the largest sum of quality scores - if ( contextType == StratifiedAlignmentContext.StratifiedContextType.COMPLETE ) + if ( alternateAlleleToUse == null ) initializeBestAlternateAllele(refBase, contexts); + else + bestAlternateAllele = alternateAlleleToUse.getBases()[0]; // if there are no non-ref bases... if ( bestAlternateAllele == null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index 2e36c0c17..e2460962c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; @@ -41,8 +42,10 @@ import java.util.*; /** * Uses the UG engine to determine per-sample genotype likelihoods and emits them as a VCF (using PLs). * Absolutely not supported or recommended for public use. - * Run this as you would the UnifiedGenotyper. + * Run this as you would the UnifiedGenotyper, except that you must additionally pass in a VCF bound to + * the name 'allele' so we know which alternate allele to use at each site. */ +@Requires(value={},referenceMetaData=@RMD(name="allele", type= VariantContext.class)) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.READS) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) @@ -85,7 +88,20 @@ public class UGCalcLikelihoods extends LocusWalker } public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { - return new VariantCallContext(UG_engine.calculateLikelihoods(tracker, refContext, rawContext), refContext.getBase(), true); + Collection VCs = tracker.getVariantContexts(refContext, "allele", null, rawContext.getLocation(), true, false); + if ( VCs.size() == 0 ) + return null; + if ( VCs.size() > 1 ) { + logger.warn("Multiple records seen in the 'allele' ROD at position " + rawContext.getLocation() + "; skipping..."); + return null; + } + VariantContext vc = VCs.iterator().next(); + if ( !vc.isBiallelic() ) { + logger.warn("The record in the 'allele' ROD at position " + rawContext.getLocation() + " is not biallelic; skipping..."); + return null; + } + + return new VariantCallContext(UG_engine.calculateLikelihoods(tracker, refContext, rawContext, vc.getAlternateAllele(0)), refContext.getBase(), true); } public Integer reduceInit() { return 0; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index c0d0c061c..52c101683 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -142,7 +142,7 @@ public class UnifiedGenotyperEngine { */ public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); - VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE, null); if ( vc == null ) return null; @@ -157,15 +157,16 @@ public class UnifiedGenotyperEngine { * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus + * @param alternateAlleleToUse the alternate allele to use, null if not set * @return the VariantContext object */ - public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { + public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Allele alternateAlleleToUse) { Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); - VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE, alternateAlleleToUse); return GLsToPLs(vc); } - private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type) { + private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type, Allele alternateAlleleToUse) { if ( stratifiedContexts == null ) return null; @@ -175,7 +176,7 @@ public class UnifiedGenotyperEngine { } Map GLs = new HashMap(); - Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs); + Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs, alternateAlleleToUse); if (refAllele != null) return createVariantContextFromLikelihoods(refContext, refAllele, GLs); @@ -332,7 +333,7 @@ public class UnifiedGenotyperEngine { if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod - VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); + VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, vc.getAlternateAllele(0)); clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); @@ -341,7 +342,7 @@ public class UnifiedGenotyperEngine { if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod - VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); + VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, vc.getAlternateAllele(0)); clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); @@ -574,8 +575,10 @@ public class UnifiedGenotyperEngine { // all bits are set to false by default BitSet bitset = new BitSet(record.getReadLength()); - // if the mapping quality is too low or the mate is bad, we can just zero out the whole read and continue + // if the mapping quality is too low or the mate is bad, we can just zero out the whole read and continue. + // note that, because we cap the base quality by the mapping quality, if MQ < minBQ then we can filter this read. if ( record.getMappingQuality() < UAC.MIN_MAPPING_QUALTY_SCORE || + record.getMappingQuality() < UAC.MIN_BASE_QUALTY_SCORE || (!UAC.USE_BADLY_MATED_READS && BadMateFilter.hasBadMate(record)) ) { return bitset; }