From 38388232623c3fa4dcc03f517f7f736b24d2b871 Mon Sep 17 00:00:00 2001 From: delangel Date: Fri, 8 Oct 2010 00:53:16 +0000 Subject: [PATCH] Two ugly hopefully temporary fixes for new genotyping model: a) In Indel genotyper: we can't deal yet with extended events correctly and we are still triggering at each extended event which results in repeated records on a vcf. So, to avoid this, keep track of start position of candidate variantes we've visited and if we've visited a variant before we don't do it again. b) Avoid infinite terms in QUAL and in genotype likelihoods which can happen if posterior AF happens to be exactly zero. For now, hard-code a minimum value of each term of the posterior AF likelihood to be -300 (ie 1e-300 in lin space). This can be solved with better and smarter log-to-lin conversions and some precision fixes in AF calculation. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4455 348d0f76-0448-11de-a6fe-93d51630548a --- ...elGenotypeLikelihoodsCalculationModel.java | 8 ++++ .../genotyper/ExactAFCalculationModel.java | 37 ++++++++++++++++--- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index e03f21334..b1d3e8a12 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -55,11 +55,13 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo boolean DEBUGOUT = false; private HaplotypeIndelErrorModel model; + private ArrayList sitesVisited; protected DindelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); model = new HaplotypeIndelErrorModel(maxReadDeletionLength, insertionStartProbability, insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, false, DEBUGOUT); + sitesVisited = new ArrayList(); } public Allele getLikelihoods(RefMetaDataTracker tracker, @@ -82,9 +84,15 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo if (!vc.isIndel()) return null; + if (sitesVisited.contains(new Integer(vc.getStart()))) + return null; + + sitesVisited.add(new Integer(vc.getStart())); + if ( !(priors instanceof DiploidIndelGenotypePriors) ) throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model"); + int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length(); // assume only one alt allele for now if (eventLength<0) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java index 43d74a055..20b53c185 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -43,7 +43,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private boolean DEBUGOUT = false; private boolean SIMPLE_GREEDY_GENOTYPER = false; - + private static final double EPS = 1e-300; + private static final double LOGEPS = -300; + protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) { super(N, logger, verboseWriter); } @@ -60,15 +62,20 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { alleleFrequencyPosteriors = updateAFEstimate(GLs, alleleFrequencyPriors); - double meanAF = computeMeanAF(alleleFrequencyPosteriors); - if (DEBUGOUT) + if (DEBUGOUT) { + double meanAF = computeMeanAF(alleleFrequencyPosteriors); System.out.format("Mean AF: %5.4f. PVariant: %5.5f\n", meanAF,1.0-alleleFrequencyPosteriors[0]); - + } for (int k=0; k < alleleFrequencyPosteriors.length; k++) { - log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]); + if (alleleFrequencyPosteriors[k] > 1-EPS) + log10AlleleFrequencyPosteriors[k] = -EPS; + else if (alleleFrequencyPosteriors[k] < EPS) + log10AlleleFrequencyPosteriors[k] = LOGEPS; + else + log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]); } } @@ -131,6 +138,24 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } + /** + * Can be overridden by concrete subclasses + * @param contexts alignment contexts + * @param GLs genotype likelihoods + * @param log10AlleleFrequencyPosteriors allele frequency results + * @param AFofMaxLikelihood allele frequency of max likelihood + * + * @return calls + */ + public Map assignGenotypes(Map contexts, + Map GLs, + double[] log10AlleleFrequencyPosteriors, + int AFofMaxLikelihood) { + + // overriding implementation in AlleleFrequencyCalculationModel to avoid filling out AF matrix which is not used. + return generateCalls(contexts, GLs, AFofMaxLikelihood); + } + protected Map generateCalls(Map contexts, Map GLs, int bestAFguess) { @@ -242,4 +267,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return calls; } -} \ No newline at end of file +}