Have the UG engine take care of the GL to PL conversion. Note that we still use GLs for calling (since we are losing precision in high-pass and, even worse, it can affect QD), but we emit PLs in all cases. This means that calculating the GLs, emitting them to VCF, and then calling off of them (a la samtools) is absolutely, positively not ideal.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4745 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-11-29 20:28:16 +00:00
parent 102c8b1f59
commit 222cd42ceb
4 changed files with 24 additions and 18 deletions

View File

@ -28,9 +28,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -41,7 +39,6 @@ import java.io.PrintStream;
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private boolean DEBUGOUT = false;
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private static final double[] log10Cache;
private static final double[] jacobianLogTable;
@ -330,11 +327,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
}
HashMap<String, Object> attributes = new HashMap<String, Object>(g.getAttributes());
attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(likelihoods));
calls.put(sample, new Genotype(sample, myAlleles, qual, null, attributes, false));
calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
}

View File

@ -28,9 +28,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.collections.Pair;
@ -121,11 +119,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
myAlleles.add(altAllele);
}
HashMap<String, Object> attributes = new HashMap<String, Object>(g.getAttributes());
attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(g.getLikelihoods().getAsVector()));
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false));
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, g.getAttributes(), false));
}
return calls;

View File

@ -144,7 +144,12 @@ public class UnifiedGenotyperEngine {
public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
return vc == null ? null : calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc);
if ( vc == null )
return null;
VariantCallContext vcc = calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc);
vcc.vc = GLsToPLs(vcc.vc);
return vcc;
}
/**
@ -157,7 +162,8 @@ public class UnifiedGenotyperEngine {
*/
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext);
return calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
return GLsToPLs(vc);
}
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, StratifiedAlignmentContext> stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type) {
@ -214,6 +220,20 @@ public class UnifiedGenotyperEngine {
null);
}
private static VariantContext GLsToPLs(VariantContext vc) {
if ( vc == null )
return null;
HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
HashMap<String, Object> attributes = new HashMap<String, Object>(genotype.getValue().getAttributes());
attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(genotype.getValue().getLikelihoods().getAsVector()));
calls.put(genotype.getKey(), Genotype.modifyAttributes(genotype.getValue(), attributes));
}
return VariantContext.modifyGenotypes(vc, calls);
}
/**
* Compute genotypes at a given locus.
*

View File

@ -153,7 +153,6 @@ public class UnifiedGenotyperV2 extends LocusWalker<VariantCallContext, UnifiedG
}
// FORMAT and INFO fields
// TODO: if only outputting GLs, change this to the GL version
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY));
// FILTER fields