From 9f3cbb2ef10fd1449d53906cbe7e458fdc57dca5 Mon Sep 17 00:00:00 2001
From: Laura Gauthier
+ * Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the + * matching site in the priors VCF unless the --calculateMissingPriors flag is used). + * If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP + * sites in the input callset. + *
+ * ** Inform the genotype assignment of NA12878 using the 1000G Euro panel @@ -180,7 +188,7 @@ public class CalculateGenotypePosteriors extends RodWalker{ */ @Argument(fullName="numRefSamplesIfNoCall",shortName="nrs",doc="The number of homozygous reference to infer were " + "seen at a position where an \"other callset\" contains no site or genotype information",required=false) - public int numRefIfMissing = 1; + public int numRefIfMissing = 0; /** * Rather than looking for the MLEAC field first, and then falling back to AC; first look for the AC field and then @@ -198,11 +206,15 @@ public class CalculateGenotypePosteriors extends RodWalker { "related individuals.",required=false) public boolean ignoreInputSamples = false; + /** + * Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors + */ + @Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false) + public boolean calcMissing = false; + @Output(doc="File to which variants should be written") protected VariantContextWriter vcfWriter = null; - private final boolean NO_EM = false; - public void initialize() { // Get list of samples to include in the output final List rodNames = Arrays.asList(variantCollection.variants.getName()); @@ -254,7 +266,7 @@ public class CalculateGenotypePosteriors extends RodWalker { final int missing = supportVariants.size() - otherVCs.size(); for ( VariantContext vc : vcs ) { - vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, NO_EM, defaultToAC)); + vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing)); } return 1; diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java index d9b0c575e..bfd8edf30 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtils.java @@ -62,28 +62,33 @@ public class PosteriorLikelihoodsUtils { final int numRefSamplesFromMissingResources, final double globalFrequencyPriorDirichlet, final boolean useInputSamples, - final boolean useEM, - final boolean useAC) { - if ( useEM ) - throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented"); + final boolean useAC, + final boolean calcMissing) { final Map totalAlleleCounts = new HashMap<>(); + boolean nonSNPprior = false; + if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null"); + final boolean nonSNPeval = !vc1.isSNP(); + final double[] alleleCounts = new double[vc1.getNAlleles()]; - //store the allele counts for each allele in the variant priors - for ( final VariantContext resource : resources ) { - addAlleleCounts(totalAlleleCounts,resource,useAC); + if(!nonSNPeval) + { + //store the allele counts for each allele in the variant priors + for ( final VariantContext resource : resources ) { + if( !resource.isSNP()) nonSNPprior = true; + addAlleleCounts(totalAlleleCounts,resource,useAC); + } + + //add the allele counts from the input samples (if applicable) + if ( useInputSamples ) { + addAlleleCounts(totalAlleleCounts,vc1,useAC); + } + + //add zero allele counts for any reference alleles not seen in priors (if applicable) + totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources); } - //add the allele counts from the input samples (if applicable) - if ( useInputSamples ) { - addAlleleCounts(totalAlleleCounts,vc1,useAC); - } - - //add zero allele counts for any reference alleles not seen in priors (if applicable) - totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources); - // now extract the counts of the alleles present within vc1, and in order - final double[] alleleCounts = new double[vc1.getNAlleles()]; int alleleIndex = 0; for ( final Allele allele : vc1.getAlleles() ) { @@ -91,13 +96,17 @@ public class PosteriorLikelihoodsUtils { totalAlleleCounts.get(allele) : 0 ); } + //parse the likelihoods for each sample's genotype final List likelihoods = new ArrayList<>(vc1.getNSamples()); for ( final Genotype genotype : vc1.getGenotypes() ) { likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null ); } - final List posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2)); + //TODO: for now just use priors that are SNPs because indel priors will bias SNP calls + final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing); + + final List posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors); final GenotypesContext newContext = GenotypesContext.create(); for ( int genoIdx = 0; genoIdx < vc1.getNSamples(); genoIdx ++ ) { @@ -112,7 +121,7 @@ public class PosteriorLikelihoodsUtils { } final List priors = Utils.listFromPrimitives( - GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2))).getAsPLs()); + GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2),useFlatPriors)).getAsPLs()); final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute("PG", priors); // add in the AC, AF, and AN attributes @@ -126,16 +135,18 @@ public class PosteriorLikelihoodsUtils { * @param genotypeLikelihoods - the genotype likelihoods for the individual * @param knownAlleleCountsByAllele - the known allele counts in the population. For AC=2 AN=12 site, this is {10,2} * @param ploidy - the ploidy to assume + * @param useFlatPriors - if true, apply flat priors to likelihoods in order to calculate posterior probabilities * @return - the posterior genotype likelihoods */ protected static List calculatePosteriorGLs(final List genotypeLikelihoods, final double[] knownAlleleCountsByAllele, - final int ploidy) { + final int ploidy, + final boolean useFlatPriors) { if ( ploidy != 2 ) { throw new IllegalStateException("Genotype posteriors not yet implemented for ploidy != 2"); } - final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy); + final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy, useFlatPriors); final List posteriors = new ArrayList<>(genotypeLikelihoods.size()); for ( final double[] likelihoods : genotypeLikelihoods ) { double[] posteriorProbabilities = null; @@ -164,8 +175,9 @@ public class PosteriorLikelihoodsUtils { // convenience function for a single genotypelikelihoods array. Just wraps. protected static double[] calculatePosteriorGLs(final double[] genotypeLikelihoods, final double[] knownAlleleCountsByAllele, - final int ploidy) { - return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy).get(0); + final int ploidy, + final boolean useFlatPriors) { + return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy, useFlatPriors).get(0); } @@ -180,7 +192,7 @@ public class PosteriorLikelihoodsUtils { * @param ploidy - the number of chromosomes in the sample. For now restricted to 2. * @return - the Dirichlet-Multinomial distribution over genotype states */ - protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy) { + protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy, final boolean useFlatPrior) { if ( ploidy != 2 ) { throw new IllegalStateException("Genotype priors not yet implemented for ploidy != 2"); } @@ -192,10 +204,14 @@ public class PosteriorLikelihoodsUtils { int priorIndex = 0; for ( int allele2 = 0; allele2 < knownCountsByAllele.length; allele2++ ) { for ( int allele1 = 0; allele1 <= allele2; allele1++) { - final int[] counts = new int[knownCountsByAllele.length]; - counts[allele1] += 1; - counts[allele2] += 1; - priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy); + if (useFlatPrior) + priors[priorIndex++] = 1.0; + else { + final int[] counts = new int[knownCountsByAllele.length]; + counts[allele1] += 1; + counts[allele2] += 1; + priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy); + } } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java index 14341e401..d7cd4e88f 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java @@ -55,6 +55,19 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testUsingDiscoveredAF() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" + + " -o %s" + + " -R " + b37KGReference + + " -L 20:10,000,000-10,100,000" + + " -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", + 1, + Arrays.asList("80d0eedddd215df8ab29bde908c73ca4")); + executeTest("testUsingDiscoveredAF", spec); + } + + @Test(enabled = true) + public void testMissingPriors() { WalkerTestSpec spec = new WalkerTestSpec( "-T CalculateGenotypePosteriors --no_cmdline_in_header" + " -o %s" + @@ -62,8 +75,23 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest { " -L 20:10,000,000-10,100,000" + " -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", 1, - Arrays.asList("e1adedc7e1d63e384187b24b7ded4410")); - executeTest("testUsingDiscoveredAF", spec); + Arrays.asList("f7653ef21b859b90d54a71ef4245ec85")); + executeTest("testMissingPriors", spec); } + @Test(enabled = true) + public void testInputINDELs() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CalculateGenotypePosteriors --no_cmdline_in_header" + + " -o %s" + + " -R " + b37KGReference + + " -L 20:10,000,000-10,100,000" + + " -V " + validationDataLocation + "NA12878.Jan2013.haplotypeCaller.subset.indels.vcf" + + " -VV " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", + 1, + Arrays.asList("6dd7dcf94bfe99ddcbd477100592db89")); + executeTest("testMissingPriors", spec); + } + + } \ No newline at end of file diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java index 87f664905..33d57d801 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java @@ -137,7 +137,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { makeG("s2",T,T,60,40,0), makeG("s3",Aref,Aref,0,30,90)); test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make(); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList (), 0, 0.001, true, false, false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList (), 0, 0.001, true, false, true); Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686}); Assert.assertTrue(test1exp1.hasPL()); Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05}); @@ -155,7 +155,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { makeG("s3",Aref,Aref,0,5,8,15,20,40), makeG("s4",C,T,80,40,12,20,0,10)); test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList (Arrays.asList(2,2))).make(); - VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList (),5,0.001,true,false,false); + VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList (),5,0.001,true,false, true); Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193}); Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778}); Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677}); @@ -177,7 +177,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { supplTest1.add(makeVC("4",Arrays.asList(Aref,T), makeG("s_1",T,T), makeG("s_2",Aref,T))); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true); // the counts here are ref=30, alt=14 Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766}); Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024}); @@ -188,7 +188,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0)); List other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0))); - VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false); + VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true); Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066}); Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List ) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), ""); } @@ -198,7 +198,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); int[] GP = _mleparse( (List )test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); Assert.assertTrue(GP[2] > GP[1]); @@ -209,7 +209,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); int[] GP = _mleparse( (List )test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); Assert.assertTrue(GP[2] < GP[1]); @@ -220,7 +220,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); int[] GP = _mleparse( (List )test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); Assert.assertTrue(GP[0] > GP[1]); @@ -231,7 +231,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); int[] GP = _mleparse( (List )test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); Assert.assertTrue(GP[0] < GP[1]); @@ -244,7 +244,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { makeG("s3",Aref,T,22,0,12)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); } @Test (expectedExceptions = {UserException.class}) @@ -255,7 +255,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); } @Test (expectedExceptions = {UserException.class}) @@ -265,7 +265,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { makeG("s3",Aref,T,22,0,12)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); } @Test @@ -275,7 +275,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { makeG("s3",Aref,T,22,0,12)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); } @Test @@ -285,7 +285,38 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { makeG("s3",Aref,T,22,0,12)); List supplTest1 = new ArrayList<>(1); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); - VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true); + } + + @Test + private void testInputIndel() { + VariantContext inputIndel = makeVC("1", Arrays.asList(Aref, ATC), makeG("s1",ATC,ATC,40,20,0), + makeG("s2",Aref,ATC,18,0,24), + makeG("s3",Aref,ATC,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true); + + System.out.println(test1result); + int[] GPs = _mleparse( (List )test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + int[] PLs = test1result.getGenotype(0).getPL(); + Assert.assertEquals(PLs,GPs); + } + + @Test + private void testPriorIndel() { + VariantContext inputIndel = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0), + makeG("s2",Aref,T,18,0,24), + makeG("s3",Aref,T,22,0,12)); + List supplTest1 = new ArrayList<>(1); + supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); + VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true); + + + System.out.println(test1result); + int[] GPs = _mleparse( (List )test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); + int[] PLs = test1result.getGenotype(0).getPL(); + Assert.assertEquals(PLs,GPs); } private double[] pl2gl(int[] pl) { @@ -411,14 +442,14 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { knownCounts[1] = numAlt-altCount; int expected_index = 0; for ( int gl_index = 0; gl_index < likelihood_PLs.length; gl_index++ ) { - double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2); + double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2,false); for ( int i = 0; i < post.length; i++ ) { double expected = expectations[testIndex][expected_index++]; double observed = Math.pow(10.0,post[i]); double err = Math.abs( (expected-observed)/expected ); Assert.assertTrue(err < 1e-4, String.format("Counts: %s | Expected: %e | Observed: %e | pre %s | prior %s | post %s", Arrays.toString(knownCounts), expected,observed, Arrays.toString(pl2gl(likelihood_PLs[gl_index])), - Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2)),Arrays.toString(post))); + Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2,false)),Arrays.toString(post))); } } testIndex++; @@ -466,17 +497,17 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest { double expected_five[] = new double[] { -9.170334, -5.175724, -6.767055, -0.8250021, -5.126027, -0.07628661, -3.276762, -3.977787, -2.227065, -4.57769, -5.494041, -2.995066, -7.444344, -7.096104, -2.414187}; - double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2); - double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2); - double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2); - double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2); - double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2); + double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2,false); + double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2,false); + double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2,false); + double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2,false); + double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2,false); double[] expecPrior5 = new double[] {-4.2878195, -4.2932090, -4.8845400, -1.9424874, -2.2435120, -0.1937719, -3.5942477, -3.8952723, -1.5445506, -3.4951749, -2.6115263, -2.9125508, -0.5618292, -2.2135895, -1.5316722}; - Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2))); + Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false))); Assert.assertTrue(arraysApproxEqual(expected_one,post1,1e-6),errMsgArray(expected_one,post1)); Assert.assertTrue(arraysApproxEqual(expected_two,post2,1e-5),errMsgArray(expected_two,post2));