From 7f9f58dbd1c357ca07cae22ea875505b4797ebf8 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Thu, 27 Feb 2014 15:48:40 -0500 Subject: [PATCH 01/10] Added hidden flag to GenotypeConcordance to output sites of discordant genotypes (to System.out) Revised ConcondanceMetrics tests to adapt to change Added comments to PosteriorLikelihoodsUtils --- .../PosteriorLikelihoodsUtils.java | 33 ++++++++++++++++--- .../ConcordanceMetricsUnitTest.java | 22 ++++++------- .../variantutils/ConcordanceMetrics.java | 24 +++++++++++--- .../variantutils/GenotypeConcordance.java | 21 ++++++++---- 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java index 6f56415f7..4b81c199f 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java @@ -67,14 +67,18 @@ public class PosteriorLikelihoodsUtils { throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented"); final Map totalAlleleCounts = new HashMap<>(); + + //store the allele counts for each allele in the variant priors for ( final VariantContext resource : resources ) { addAlleleCounts(totalAlleleCounts,resource,useAC); } + //add the allele counts from the input samples (if applicable) if ( useInputSamples ) { addAlleleCounts(totalAlleleCounts,vc1,useAC); } + //add zero allele counts for any reference alleles not seen in priors (if applicable) totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources); // now extract the counts of the alleles present within vc1, and in order @@ -86,6 +90,7 @@ public class PosteriorLikelihoodsUtils { totalAlleleCounts.get(allele) : 0 ); } + //parse the likelihoods for each sample's genotype final List likelihoods = new ArrayList<>(vc1.getNSamples()); for ( final Genotype genotype : vc1.getGenotypes() ) { likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null ); @@ -196,13 +201,24 @@ public class PosteriorLikelihoodsUtils { return priors; } + /** + * Parse counts for each allele + * @param counts - Map to store and return data + * @param context - line to be parsed from the input VCF file + * @param useAC - use allele count annotation value from VariantContext (vs. MLEAC) + */ private static void addAlleleCounts(final Map counts, final VariantContext context, final boolean useAC) { final int[] ac; + //use MLEAC value... if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) { ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); - } else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { + } + //...unless specified by the user in useAC + else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); - } else { + } + //if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field + else { ac = new int[context.getAlternateAlleles().size()]; int idx = 0; for ( final Allele allele : context.getAlternateAlleles() ) { @@ -210,24 +226,33 @@ public class PosteriorLikelihoodsUtils { } } + //since the allele count for the reference allele is not given in the VCF format, + //calculate it from the allele number minus the total counts for alternate alleles for ( final Allele allele : context.getAlleles() ) { final int count; if ( allele.isReference() ) { if ( context.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) { - count = context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac); + count = Math.max(context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac),0); //occasionally an MLEAC value will sneak in that's greater than the AN } else { - count = context.getCalledChrCount() - (int) MathUtils.sum(ac); + count = Math.max(context.getCalledChrCount() - (int) MathUtils.sum(ac),0); } } else { count = ac[context.getAlternateAlleles().indexOf(allele)]; } + //if this allele isn't in the map yet, add it if ( ! counts.containsKey(allele) ) { counts.put(allele,0); } + //add the count for the current allele to the existing value in the map counts.put(allele,count + counts.get(allele)); } } + /** + * Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately + * @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute() + * @return - array of ints + */ public static int[] extractInts(final Object integerListContainingVCField) { List mleList = null; if ( integerListContainingVCField instanceof List ) { diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java index 50c896450..6ece527ce 100755 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1); @@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2); @@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { codec = new VCFCodec(); evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2); @@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); @@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); @@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); @@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); for ( Pair contextPair : data ) { VariantContext eval = contextPair.getFirst(); @@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); int[][] table = metrics.getOverallGenotypeConcordance().getTable(); // set up the table table[0] = new int[] {30, 12, 7, 5, 6, 0}; @@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1)))); VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2)))); VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3)))); - ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader); - ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader); + ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false); + ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false); // test what happens if you put in disjoint sets and start making requests Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size()); @@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); List> data = getData7(); diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 848261d73..79e0e8f9c 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -42,8 +42,9 @@ public class ConcordanceMetrics { private Map perSampleGenotypeConcordance; private GenotypeConcordanceTable overallGenotypeConcordance; private SiteConcordanceTable overallSiteConcordance; + private Boolean printInterestingSites; - public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) { + public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) { HashSet overlappingSamples = new HashSet(evaluate.getGenotypeSamples()); overlappingSamples.retainAll(truth.getGenotypeSamples()); perSampleGenotypeConcordance = new HashMap(overlappingSamples.size()); @@ -52,6 +53,7 @@ public class ConcordanceMetrics { } overallGenotypeConcordance = new GenotypeConcordanceTable(); overallSiteConcordance = new SiteConcordanceTable(); + printInterestingSites = printSitesEnabled; } public GenotypeConcordanceTable getOverallGenotypeConcordance() { @@ -114,6 +116,7 @@ public class ConcordanceMetrics { @Requires({"eval != null","truth != null"}) public void update(VariantContext eval, VariantContext truth) { + Boolean doPrint = false; overallSiteConcordance.update(eval,truth); Set alleleTruth = new HashSet(8); String truthRef = truth.getReference().getBaseString(); @@ -130,7 +133,12 @@ public class ConcordanceMetrics { throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy())); } perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef); - overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef); + doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef); + if(printInterestingSites && doPrint) + System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType()); + + //Below is code to print out mismatched alternate alleles + //System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles()); } } @@ -212,13 +220,14 @@ public class ConcordanceMetrics { } @Requires({"eval!=null","truth != null","truthAlleles != null"}) - public void update(Genotype eval, Genotype truth, Set truthAlleles, String truthRef) { + public Boolean update(Genotype eval, Genotype truth, Set truthAlleles, String truthRef) { // this is slow but correct. // NOTE: a reference call in "truth" is a special case, the eval can match *any* of the truth alleles // that is, if the reference base is C, and a sample is C/C in truth, A/C, A/A, T/C, T/T will // all match, so long as A and T are alleles in the truth callset. boolean matchingAlt = true; + int evalGT, truthGT; if ( eval.isCalled() && truth.isCalled() && truth.isHomRef() ) { // by default, no-calls "match" between alleles, so if // one or both sites are no-call or unavailable, the alt alleles match @@ -241,10 +250,17 @@ public class ConcordanceMetrics { } if ( matchingAlt ) { - genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++; + evalGT = eval.getType().ordinal(); + truthGT = truth.getType().ordinal(); + genotypeCounts[evalGT][truthGT]++; + if(evalGT != truthGT) //report variants where genotypes don't match + return true; } else { nMismatchingAlt++; + return false; + //return true; //alternatively, report variants where alt alleles don't match } + return false; } public int[][] getTable() { diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 8c8961cb5..08c938583 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -213,6 +210,16 @@ public class GenotypeConcordance extends RodWalker> evalCompList, ConcordanceMetrics metrics) { - for ( Pair evalComp : evalCompList) + for ( Pair evalComp : evalCompList){ metrics.update(evalComp.getFirst(),evalComp.getSecond()); + + } return metrics; } From 43fdd38342a291589289dde97badc066ff31c908 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Tue, 18 Feb 2014 11:44:19 -0500 Subject: [PATCH 02/10] Add error handling to CalculateGenotypePosteriors to catch multiallelic variants with wrong number of ACs -- throws UserException; added tests in PosteriorLikelihoodsUtilsUnitTests Add error handling to CalculateGenotypePosteriors for cases where MLEAC>AN; add tests in PosteriorLikelihoodsUtilsUnitTests Add unit tests to confirm that CalculateGenotypePosteriors has the ability to switch genotypes for four cases --- .../PosteriorLikelihoodsUtils.java | 28 +++++- .../PosteriorLikelihoodsUtilsUnitTest.java | 96 +++++++++++++++++++ .../variantutils/ConcordanceMetrics.java | 6 +- 3 files changed, 123 insertions(+), 7 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java index 4b81c199f..d9b0c575e 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFConstants; @@ -211,13 +212,13 @@ public class PosteriorLikelihoodsUtils { final int[] ac; //use MLEAC value... if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) { - ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); + ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context); } - //...unless specified by the user in useAC + //...unless specified by the user in useAC or unless MLEAC is absent else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { - ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); + ac = getAlleleCounts(VCFConstants.ALLELE_COUNT_KEY, context); } - //if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field + //if VariantContext annotation doesn't contain AC or MLEAC then get the data from direct evaluation else { ac = new int[context.getAlternateAlleles().size()]; int idx = 0; @@ -248,6 +249,25 @@ public class PosteriorLikelihoodsUtils { } } + /** + * Retrieve allele count data from VariantContext using VCFkey, checks for correct number of values in VCF + * @param VCFkey VariantContext annotation tag of interest (should be AC or MLEAC) + * @param context VariantContext from which to extract the data + * @return int[] with allele count data + */ + private static int[] getAlleleCounts(final String VCFkey, final VariantContext context) { + final Object alleleCountsFromVCF = context.getAttribute(VCFkey); + if ( alleleCountsFromVCF instanceof List ) { + if ( ((List) alleleCountsFromVCF).size() != context.getAlternateAlleles().size() ) + throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart())); + } + else if ( alleleCountsFromVCF instanceof String || alleleCountsFromVCF instanceof Integer) {//here length is 1 + if (context.getAlternateAlleles().size() != 1) + throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart())); + } + return extractInts(alleleCountsFromVCF); + } + /** * Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately * @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute() diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java index 0eca18c46..87f664905 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java @@ -52,6 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; * Date: 12/8/13 */ +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.variant.variantcontext.*; @@ -192,6 +193,101 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), ""); } + @Test + private void testCalculatePosteriorHOM_VARtoHET() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[2] > GP[1]); + } + + @Test + private void testCalculatePosteriorHETtoHOM_VAR() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[2] < GP[1]); + } + + @Test + private void testCalculatePosteriorHOM_REFtoHET() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[0] > GP[1]); + } + + @Test + private void testCalculatePosteriorHETtoHOM_REF() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[0] < GP[1]); + } + + @Test + private void testMLEACgreaterThanAN() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test (expectedExceptions = {UserException.class}) + private void testWrongNumberACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test (expectedExceptions = {UserException.class}) + private void testWrongNumberMLEACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test + private void testMultipleACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test + private void testMultipleMLEACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + private double[] pl2gl(int[] pl) { double[] gl = new double[pl.length]; for ( int idx = 0; idx < gl.length; idx++ ) { diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 79e0e8f9c..395e24604 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -42,9 +42,9 @@ public class ConcordanceMetrics { private Map perSampleGenotypeConcordance; private GenotypeConcordanceTable overallGenotypeConcordance; private SiteConcordanceTable overallSiteConcordance; - private Boolean printInterestingSites; + private boolean printInterestingSites; - public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) { + public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) { HashSet overlappingSamples = new HashSet(evaluate.getGenotypeSamples()); overlappingSamples.retainAll(truth.getGenotypeSamples()); perSampleGenotypeConcordance = new HashMap(overlappingSamples.size()); @@ -116,7 +116,7 @@ public class ConcordanceMetrics { @Requires({"eval != null","truth != null"}) public void update(VariantContext eval, VariantContext truth) { - Boolean doPrint = false; + boolean doPrint = false; overallSiteConcordance.update(eval,truth); Set alleleTruth = new HashSet(8); String truthRef = truth.getReference().getBaseString(); From a9ddfdb7c002e16741bb46ebf093b93b79b74604 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 5 Mar 2014 20:25:38 -0500 Subject: [PATCH 04/10] Remove external-example module from public pom.xml This module was causing failures during the release packaging tests. After discussing with Khalid, we've decided to disable it for now until a fix can be developed. --- public/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/pom.xml b/public/pom.xml index 89d49997c..3840b48aa 100644 --- a/public/pom.xml +++ b/public/pom.xml @@ -19,7 +19,7 @@ sting-utils gatk-framework gatk-package - external-example + From 34edcb8ddf8b0f12364dcd4aa1eafb989a895bed Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 5 Mar 2014 23:37:21 -0500 Subject: [PATCH 05/10] Update pom versions for the 3.0 release --- pom.xml | 2 +- protected/gatk-protected/pom.xml | 2 +- protected/pom.xml | 2 +- public/external-example/pom.xml | 2 +- public/gatk-framework/pom.xml | 2 +- public/gatk-package/pom.xml | 2 +- public/gatk-queue-extgen/pom.xml | 2 +- public/gsalib/pom.xml | 2 +- public/package-tests/pom.xml | 2 +- public/pom.xml | 2 +- public/queue-framework/pom.xml | 2 +- public/queue-package/pom.xml | 2 +- public/sting-root/pom.xml | 2 +- public/sting-utils/pom.xml | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pom.xml b/pom.xml index 96074d881..d899506b5 100644 --- a/pom.xml +++ b/pom.xml @@ -13,7 +13,7 @@ org.broadinstitute.sting sting-root - 2.8-SNAPSHOT + 3.0 public/sting-root diff --git a/protected/gatk-protected/pom.xml b/protected/gatk-protected/pom.xml index d75c5b056..26aabd187 100644 --- a/protected/gatk-protected/pom.xml +++ b/protected/gatk-protected/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 2.8-SNAPSHOT + 3.0 ../.. diff --git a/protected/pom.xml b/protected/pom.xml index 4b165d477..8a9646438 100644 --- a/protected/pom.xml +++ b/protected/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-root - 2.8-SNAPSHOT + 3.0 ../public/sting-root diff --git a/public/external-example/pom.xml b/public/external-example/pom.xml index bdfeb099f..9c05867a8 100644 --- a/public/external-example/pom.xml +++ b/public/external-example/pom.xml @@ -9,7 +9,7 @@ GATK External Example - 2.8-SNAPSHOT + 3.0