From 97b191f5787638223d858f6f6cdd6099d0166396 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 20 Aug 2012 01:16:23 -0400 Subject: [PATCH] Thanks to Guillermo I was able to isolate an instance of where the MLEAC > AN. It turns out that this is valid, e.g. when PLs are all 0s for a sample we no-call it but it's allowed to factor into the MLE (since that's the contract with the exact model). Removing the check in UG and instead protecting for it in the AlleleCount stratification. --- .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 10 ++-------- .../varianteval/stratifications/AlleleCount.java | 5 +++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 67ade390f..3d9724ffb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -444,16 +444,10 @@ public class UnifiedGenotyperEngine { if ( alleleCountsofMLE.size() > 0 ) { attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); final int AN = builder.make().getCalledChrCount(); - - // let's sanity check that we don't have an invalid MLE value in there - for ( int MLEAC : alleleCountsofMLE ) { - if ( MLEAC > AN ) - throw new ReviewedStingException(String.format("MLEAC value (%d) is larger than AN (%d) at position %s:%d", MLEAC, AN, loc.getContig(), loc.getStart())); - } - final ArrayList MLEfrequencies = new ArrayList(alleleCountsofMLE.size()); + // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1) for ( int AC : alleleCountsofMLE ) - MLEfrequencies.add((double)AC / (double)AN); + MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN)); attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 00a593768..2b1bd9c62 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -46,7 +46,8 @@ public class AlleleCount extends VariantStratifier { int AC = 0; // by default, the site is considered monomorphic if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && eval.isBiallelic() ) { - AC = eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0); + // the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1) + AC = Math.min(eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0), nchrom); } else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && eval.isBiallelic() ) { AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0); } else if ( eval.isVariant() ) { @@ -56,7 +57,7 @@ public class AlleleCount extends VariantStratifier { // make sure that the AC isn't invalid if ( AC > nchrom ) - throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d " + + throw new UserException.MalformedVCF(String.format("The AC value (%d) at position %s:%d " + "is larger than the number of chromosomes over all samples (%d)", AC, eval.getChr(), eval.getStart(), nchrom));