1. As discussed in group meeting today, because we cap BQ by MQ, if MQ < minBQ then we filter the read.

2. Update to UGCalcLikelihoods for Chris: require a vcf bound to 'allele' to be provided so that we know exactly which alternate allele we should be calculating GLs for at each site.  The user is warned when the VC is not biallelic or there are multiple records at a site.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4780 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-12-03 05:57:06 +00:00
parent da6a07ad3b
commit 237ab1d489
5 changed files with 39 additions and 14 deletions

View File

@ -67,7 +67,8 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
Map<String, StratifiedAlignmentContext> contexts,
StratifiedAlignmentContext.StratifiedContextType contextType,
GenotypePriors priors,
Map<String, BiallelicGenotypeLikelihoods> GLs) {
Map<String, BiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse) { // TODO: use this instead of reading the 'indels' ROD from the tracker below
if ( tracker == null )
return null;

View File

@ -67,6 +67,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param contextType stratified context type
* @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set
*
* @return genotype likelihoods per sample for AA, AB, BB
*/
@ -75,7 +76,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
Map<String, StratifiedAlignmentContext> contexts,
StratifiedAlignmentContext.StratifiedContextType contextType,
GenotypePriors priors,
Map<String, BiallelicGenotypeLikelihoods> GLs);
Map<String, BiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse);
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;

View File

@ -53,7 +53,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
Map<String, StratifiedAlignmentContext> contexts,
StratifiedAlignmentContext.StratifiedContextType contextType,
GenotypePriors priors,
Map<String, BiallelicGenotypeLikelihoods> GLs) {
Map<String, BiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse) {
if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
@ -62,8 +63,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
Allele refAllele = Allele.create(refBase, true);
// find the alternate allele with the largest sum of quality scores
if ( contextType == StratifiedAlignmentContext.StratifiedContextType.COMPLETE )
if ( alternateAlleleToUse == null )
initializeBestAlternateAllele(refBase, contexts);
else
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
// if there are no non-ref bases...
if ( bestAlternateAllele == null ) {

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
@ -41,8 +42,10 @@ import java.util.*;
/**
* Uses the UG engine to determine per-sample genotype likelihoods and emits them as a VCF (using PLs).
* Absolutely not supported or recommended for public use.
* Run this as you would the UnifiedGenotyper.
* Run this as you would the UnifiedGenotyper, except that you must additionally pass in a VCF bound to
* the name 'allele' so we know which alternate allele to use at each site.
*/
@Requires(value={},referenceMetaData=@RMD(name="allele", type= VariantContext.class))
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.READS)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@ -85,7 +88,20 @@ public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer>
}
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
return new VariantCallContext(UG_engine.calculateLikelihoods(tracker, refContext, rawContext), refContext.getBase(), true);
Collection<VariantContext> VCs = tracker.getVariantContexts(refContext, "allele", null, rawContext.getLocation(), true, false);
if ( VCs.size() == 0 )
return null;
if ( VCs.size() > 1 ) {
logger.warn("Multiple records seen in the 'allele' ROD at position " + rawContext.getLocation() + "; skipping...");
return null;
}
VariantContext vc = VCs.iterator().next();
if ( !vc.isBiallelic() ) {
logger.warn("The record in the 'allele' ROD at position " + rawContext.getLocation() + " is not biallelic; skipping...");
return null;
}
return new VariantCallContext(UG_engine.calculateLikelihoods(tracker, refContext, rawContext, vc.getAlternateAllele(0)), refContext.getBase(), true);
}
public Integer reduceInit() { return 0; }

View File

@ -142,7 +142,7 @@ public class UnifiedGenotyperEngine {
*/
public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE, null);
if ( vc == null )
return null;
@ -157,15 +157,16 @@ public class UnifiedGenotyperEngine {
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @param alternateAlleleToUse the alternate allele to use, null if not set
* @return the VariantContext object
*/
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Allele alternateAlleleToUse) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE, alternateAlleleToUse);
return GLsToPLs(vc);
}
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, StratifiedAlignmentContext> stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type) {
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, StratifiedAlignmentContext> stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type, Allele alternateAlleleToUse) {
if ( stratifiedContexts == null )
return null;
@ -175,7 +176,7 @@ public class UnifiedGenotyperEngine {
}
Map<String, BiallelicGenotypeLikelihoods> GLs = new HashMap<String, BiallelicGenotypeLikelihoods>();
Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs);
Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs, alternateAlleleToUse);
if (refAllele != null)
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
@ -332,7 +333,7 @@ public class UnifiedGenotyperEngine {
if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, vc.getAlternateAllele(0));
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
@ -341,7 +342,7 @@ public class UnifiedGenotyperEngine {
if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, vc.getAlternateAllele(0));
clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
@ -574,8 +575,10 @@ public class UnifiedGenotyperEngine {
// all bits are set to false by default
BitSet bitset = new BitSet(record.getReadLength());
// if the mapping quality is too low or the mate is bad, we can just zero out the whole read and continue
// if the mapping quality is too low or the mate is bad, we can just zero out the whole read and continue.
// note that, because we cap the base quality by the mapping quality, if MQ < minBQ then we can filter this read.
if ( record.getMappingQuality() < UAC.MIN_MAPPING_QUALTY_SCORE ||
record.getMappingQuality() < UAC.MIN_BASE_QUALTY_SCORE ||
(!UAC.USE_BADLY_MATED_READS && BadMateFilter.hasBadMate(record)) ) {
return bitset;
}