From 222cd42ceb8e2162e6824f7f986d57ab9c1cfdb3 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 29 Nov 2010 20:28:16 +0000 Subject: [PATCH] Have the UG engine take care of the GL to PL conversion. Note that we still use GLs for calling (since we are losing precision in high-pass and, even worse, it can affect QD), but we emit PLs in all cases. This means that calculating the GLs, emitting them to VCF, and then calling off of them (a la samtools) is absolutely, positively not ideal. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4745 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/ExactAFCalculationModel.java | 9 +------ .../genotyper/GridSearchAFEstimation.java | 8 +------ .../genotyper/UnifiedGenotyperEngine.java | 24 +++++++++++++++++-- .../walkers/genotyper/UnifiedGenotyperV2.java | 1 - 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java index 22e5bd78a..2486d57ab 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -28,9 +28,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; -import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -41,7 +39,6 @@ import java.io.PrintStream; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { - private boolean DEBUGOUT = false; private boolean SIMPLE_GREEDY_GENOTYPER = false; private static final double[] log10Cache; private static final double[] jacobianLogTable; @@ -330,11 +327,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { qual = -1.0 * Math.log10(1.0 - chosenGenotype); } - HashMap attributes = new HashMap(g.getAttributes()); - attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); - attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(likelihoods)); - - calls.put(sample, new Genotype(sample, myAlleles, qual, null, attributes, false)); + calls.put(sample, new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java index 642ca1106..c466341b6 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -28,9 +28,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.Allele; -import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.collections.Pair; @@ -121,11 +119,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { myAlleles.add(altAllele); } - HashMap attributes = new HashMap(g.getAttributes()); - attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); - attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(g.getLikelihoods().getAsVector())); - - calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false)); + calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, g.getAttributes(), false)); } return calls; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 7d6a26e62..120a28d02 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -144,7 +144,12 @@ public class UnifiedGenotyperEngine { public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); - return vc == null ? null : calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc); + if ( vc == null ) + return null; + + VariantCallContext vcc = calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc); + vcc.vc = GLsToPLs(vcc.vc); + return vcc; } /** @@ -157,7 +162,8 @@ public class UnifiedGenotyperEngine { */ public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); - return calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + return GLsToPLs(vc); } private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type) { @@ -214,6 +220,20 @@ public class UnifiedGenotyperEngine { null); } + private static VariantContext GLsToPLs(VariantContext vc) { + if ( vc == null ) + return null; + + HashMap calls = new HashMap(); + for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { + HashMap attributes = new HashMap(genotype.getValue().getAttributes()); + attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(genotype.getValue().getLikelihoods().getAsVector())); + calls.put(genotype.getKey(), Genotype.modifyAttributes(genotype.getValue(), attributes)); + } + return VariantContext.modifyGenotypes(vc, calls); + } + /** * Compute genotypes at a given locus. * diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java index c875865e9..4b65c8b66 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java @@ -153,7 +153,6 @@ public class UnifiedGenotyperV2 extends LocusWalker