diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java index 6f56415f7..d9b0c575e 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFConstants; @@ -67,14 +68,18 @@ public class PosteriorLikelihoodsUtils { throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented"); final Map totalAlleleCounts = new HashMap<>(); + + //store the allele counts for each allele in the variant priors for ( final VariantContext resource : resources ) { addAlleleCounts(totalAlleleCounts,resource,useAC); } + //add the allele counts from the input samples (if applicable) if ( useInputSamples ) { addAlleleCounts(totalAlleleCounts,vc1,useAC); } + //add zero allele counts for any reference alleles not seen in priors (if applicable) totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources); // now extract the counts of the alleles present within vc1, and in order @@ -86,6 +91,7 @@ public class PosteriorLikelihoodsUtils { totalAlleleCounts.get(allele) : 0 ); } + //parse the likelihoods for each sample's genotype final List likelihoods = new ArrayList<>(vc1.getNSamples()); for ( final Genotype genotype : vc1.getGenotypes() ) { likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null ); @@ -196,13 +202,24 @@ public class PosteriorLikelihoodsUtils { return priors; } + /** + * Parse counts for each allele + * @param counts - Map to store and return data + * @param context - line to be parsed from the input VCF file + * @param useAC - use allele count annotation value from VariantContext (vs. MLEAC) + */ private static void addAlleleCounts(final Map counts, final VariantContext context, final boolean useAC) { final int[] ac; + //use MLEAC value... if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) { - ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); - } else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { - ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); - } else { + ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context); + } + //...unless specified by the user in useAC or unless MLEAC is absent + else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { + ac = getAlleleCounts(VCFConstants.ALLELE_COUNT_KEY, context); + } + //if VariantContext annotation doesn't contain AC or MLEAC then get the data from direct evaluation + else { ac = new int[context.getAlternateAlleles().size()]; int idx = 0; for ( final Allele allele : context.getAlternateAlleles() ) { @@ -210,24 +227,52 @@ public class PosteriorLikelihoodsUtils { } } + //since the allele count for the reference allele is not given in the VCF format, + //calculate it from the allele number minus the total counts for alternate alleles for ( final Allele allele : context.getAlleles() ) { final int count; if ( allele.isReference() ) { if ( context.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) { - count = context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac); + count = Math.max(context.getAttributeAsInt(VCFConstants.ALLELE_NUMBER_KEY,-1) - (int) MathUtils.sum(ac),0); //occasionally an MLEAC value will sneak in that's greater than the AN } else { - count = context.getCalledChrCount() - (int) MathUtils.sum(ac); + count = Math.max(context.getCalledChrCount() - (int) MathUtils.sum(ac),0); } } else { count = ac[context.getAlternateAlleles().indexOf(allele)]; } + //if this allele isn't in the map yet, add it if ( ! counts.containsKey(allele) ) { counts.put(allele,0); } + //add the count for the current allele to the existing value in the map counts.put(allele,count + counts.get(allele)); } } + /** + * Retrieve allele count data from VariantContext using VCFkey, checks for correct number of values in VCF + * @param VCFkey VariantContext annotation tag of interest (should be AC or MLEAC) + * @param context VariantContext from which to extract the data + * @return int[] with allele count data + */ + private static int[] getAlleleCounts(final String VCFkey, final VariantContext context) { + final Object alleleCountsFromVCF = context.getAttribute(VCFkey); + if ( alleleCountsFromVCF instanceof List ) { + if ( ((List) alleleCountsFromVCF).size() != context.getAlternateAlleles().size() ) + throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart())); + } + else if ( alleleCountsFromVCF instanceof String || alleleCountsFromVCF instanceof Integer) {//here length is 1 + if (context.getAlternateAlleles().size() != 1) + throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart())); + } + return extractInts(alleleCountsFromVCF); + } + + /** + * Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately + * @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute() + * @return - array of ints + */ public static int[] extractInts(final Object integerListContainingVCField) { List mleList = null; if ( integerListContainingVCField instanceof List ) { diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java index 50c896450..6ece527ce 100755 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -135,7 +135,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1); @@ -185,7 +185,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2); @@ -205,7 +205,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { codec = new VCFCodec(); evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2); @@ -260,7 +260,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); @@ -313,7 +313,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); @@ -362,7 +362,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); metrics.update(eval,truth); Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); @@ -516,7 +516,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); for ( Pair contextPair : data ) { VariantContext eval = contextPair.getFirst(); @@ -550,7 +550,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); int[][] table = metrics.getOverallGenotypeConcordance().getTable(); // set up the table table[0] = new int[] {30, 12, 7, 5, 6, 0}; @@ -585,8 +585,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1)))); VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2)))); VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3)))); - ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader); - ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader); + ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,false); + ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,false); // test what happens if you put in disjoint sets and start making requests Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size()); @@ -716,7 +716,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); - ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,false); List> data = getData7(); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java index 0eca18c46..87f664905 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java @@ -52,6 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; * Date: 12/8/13 */ +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.variant.variantcontext.*; @@ -192,6 +193,101 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), ""); } + @Test + private void testCalculatePosteriorHOM_VARtoHET() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[2] > GP[1]); + } + + @Test + private void testCalculatePosteriorHETtoHOM_VAR() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[2] < GP[1]); + } + + @Test + private void testCalculatePosteriorHOM_REFtoHET() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[0] > GP[1]); + } + + @Test + private void testCalculatePosteriorHETtoHOM_REF() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + + int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + Assert.assertTrue(GP[0] < GP[1]); + } + + @Test + private void testMLEACgreaterThanAN() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test (expectedExceptions = {UserException.class}) + private void testWrongNumberACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test (expectedExceptions = {UserException.class}) + private void testWrongNumberMLEACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test + private void testMultipleACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + + @Test + private void testMultipleMLEACvalues() { + VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + } + private double[] pl2gl(int[] pl) { double[] gl = new double[pl.length]; for ( int idx = 0; idx < gl.length; idx++ ) { diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 848261d73..395e24604 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -42,8 +42,9 @@ public class ConcordanceMetrics { private Map perSampleGenotypeConcordance; private GenotypeConcordanceTable overallGenotypeConcordance; private SiteConcordanceTable overallSiteConcordance; + private boolean printInterestingSites; - public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) { + public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) { HashSet overlappingSamples = new HashSet(evaluate.getGenotypeSamples()); overlappingSamples.retainAll(truth.getGenotypeSamples()); perSampleGenotypeConcordance = new HashMap(overlappingSamples.size()); @@ -52,6 +53,7 @@ public class ConcordanceMetrics { } overallGenotypeConcordance = new GenotypeConcordanceTable(); overallSiteConcordance = new SiteConcordanceTable(); + printInterestingSites = printSitesEnabled; } public GenotypeConcordanceTable getOverallGenotypeConcordance() { @@ -114,6 +116,7 @@ public class ConcordanceMetrics { @Requires({"eval != null","truth != null"}) public void update(VariantContext eval, VariantContext truth) { + boolean doPrint = false; overallSiteConcordance.update(eval,truth); Set alleleTruth = new HashSet(8); String truthRef = truth.getReference().getBaseString(); @@ -130,7 +133,12 @@ public class ConcordanceMetrics { throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy())); } perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef); - overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef); + doPrint = overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef); + if(printInterestingSites && doPrint) + System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getType() + "\t eval is:" + evalGenotype.getType()); + + //Below is code to print out mismatched alternate alleles + //System.out.println(eval.getChr() + ":" + eval.getStart() + "\t truth is:" + truthGenotype.getAlleles() + "\t eval is:" + evalGenotype.getAlleles()); } } @@ -212,13 +220,14 @@ public class ConcordanceMetrics { } @Requires({"eval!=null","truth != null","truthAlleles != null"}) - public void update(Genotype eval, Genotype truth, Set truthAlleles, String truthRef) { + public Boolean update(Genotype eval, Genotype truth, Set truthAlleles, String truthRef) { // this is slow but correct. // NOTE: a reference call in "truth" is a special case, the eval can match *any* of the truth alleles // that is, if the reference base is C, and a sample is C/C in truth, A/C, A/A, T/C, T/T will // all match, so long as A and T are alleles in the truth callset. boolean matchingAlt = true; + int evalGT, truthGT; if ( eval.isCalled() && truth.isCalled() && truth.isHomRef() ) { // by default, no-calls "match" between alleles, so if // one or both sites are no-call or unavailable, the alt alleles match @@ -241,10 +250,17 @@ public class ConcordanceMetrics { } if ( matchingAlt ) { - genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++; + evalGT = eval.getType().ordinal(); + truthGT = truth.getType().ordinal(); + genotypeCounts[evalGT][truthGT]++; + if(evalGT != truthGT) //report variants where genotypes don't match + return true; } else { nMismatchingAlt++; + return false; + //return true; //alternatively, report variants where alt alleles don't match } + return false; } public int[][] getTable() { diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 8c8961cb5..08c938583 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -213,6 +210,16 @@ public class GenotypeConcordance extends RodWalker> evalCompList, ConcordanceMetrics metrics) { - for ( Pair evalComp : evalCompList) + for ( Pair evalComp : evalCompList){ metrics.update(evalComp.getFirst(),evalComp.getSecond()); + + } return metrics; }