From 43fdd38342a291589289dde97badc066ff31c908 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Tue, 18 Feb 2014 11:44:19 -0500 Subject: [PATCH] Add error handling to CalculateGenotypePosteriors to catch multiallelic variants with wrong number of ACs -- throws UserException; added tests in PosteriorLikelihoodsUtilsUnitTests Add error handling to CalculateGenotypePosteriors for cases where MLEAC>AN; add tests in PosteriorLikelihoodsUtilsUnitTests Add unit tests to confirm that CalculateGenotypePosteriors has the ability to switch genotypes for four cases --- .../PosteriorLikelihoodsUtils.java | 28 +++++- .../PosteriorLikelihoodsUtilsUnitTest.java | 96 +++++++++++++++++++ .../variantutils/ConcordanceMetrics.java | 6 +- 3 files changed, 123 insertions(+), 7 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java index 4b81c199f..d9b0c575e 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFConstants; @@ -211,13 +212,13 @@ public class PosteriorLikelihoodsUtils { final int[] ac; //use MLEAC value... if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) { - ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); + ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context); } - //...unless specified by the user in useAC + //...unless specified by the user in useAC or unless MLEAC is absent else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { - ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); + ac = getAlleleCounts(VCFConstants.ALLELE_COUNT_KEY, context); } - //if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field + //if VariantContext annotation doesn't contain AC or MLEAC then get the data from direct evaluation else { ac = new int[context.getAlternateAlleles().size()]; int idx = 0; @@ -248,6 +249,25 @@ public class PosteriorLikelihoodsUtils { } } + /** + * Retrieve allele count data from VariantContext using VCFkey, checks for correct number of values in VCF + * @param VCFkey VariantContext annotation tag of interest (should be AC or MLEAC) + * @param context VariantContext from which to extract the data + * @return int[] with allele count data + */ + private static int[] getAlleleCounts(final String VCFkey, final VariantContext context) { + final Object alleleCountsFromVCF = context.getAttribute(VCFkey); + if ( alleleCountsFromVCF instanceof List ) { + if ( ((List) alleleCountsFromVCF).size() != context.getAlternateAlleles().size() ) + throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart())); + } + else if ( alleleCountsFromVCF instanceof String || alleleCountsFromVCF instanceof Integer) {//here length is 1 + if (context.getAlternateAlleles().size() != 1) + throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart())); + } + return extractInts(alleleCountsFromVCF); + } + /** * Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately * @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute() diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java index 0eca18c46..87f664905 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java @@ -52,6 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; * Date: 12/8/13 */ +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.variant.variantcontext.*; @@ -192,6 +193,101 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), ""); } + @Test + private void testCalculatePosteriorHOM_VARtoHET() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[2] > GP[1]); + } + + @Test + private void testCalculatePosteriorHETtoHOM_VAR() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[2] < GP[1]); + } + + @Test + private void testCalculatePosteriorHOM_REFtoHET() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[0] > GP[1]); + } + + @Test + private void testCalculatePosteriorHETtoHOM_REF() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[0] < GP[1]); + } + + @Test + private void testMLEACgreaterThanAN() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test (expectedExceptions = {UserException.class}) + private void testWrongNumberACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test (expectedExceptions = {UserException.class}) + private void testWrongNumberMLEACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test + private void testMultipleACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test + private void testMultipleMLEACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + private double[] pl2gl(int[] pl) { double[] gl = new double[pl.length]; for ( int idx = 0; idx < gl.length; idx++ ) { diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 79e0e8f9c..395e24604 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -42,9 +42,9 @@ public class ConcordanceMetrics { private Map perSampleGenotypeConcordance; private GenotypeConcordanceTable overallGenotypeConcordance; private SiteConcordanceTable overallSiteConcordance; - private Boolean printInterestingSites; + private boolean printInterestingSites; - public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) { + public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) { HashSet overlappingSamples = new HashSet(evaluate.getGenotypeSamples()); overlappingSamples.retainAll(truth.getGenotypeSamples()); perSampleGenotypeConcordance = new HashMap(overlappingSamples.size()); @@ -116,7 +116,7 @@ public class ConcordanceMetrics { @Requires({"eval != null","truth != null"}) public void update(VariantContext eval, VariantContext truth) { - Boolean doPrint = false; + boolean doPrint = false; overallSiteConcordance.update(eval,truth); Set alleleTruth = new HashSet(8); String truthRef = truth.getReference().getBaseString();