Merge pull request #606 from broadinstitute/ldg_CalibrateLikelihoodsForCGP

Improvements to CalculateGenotypePosteriors and CalibrateGenotypeLikelih...
This commit is contained in:
ldgauthier 2014-04-24 10:58:40 -04:00
commit 147ae21253
4 changed files with 141 additions and 54 deletions

View File

@ -108,6 +108,14 @@ import java.util.*;
* 3) Per-site genotype priors added to the INFO field ("PG")
* </p>
*
* <h3>Notes</h3>
* <p>
* Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the
* matching site in the priors VCF unless the --calculateMissingPriors flag is used).
* If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP
* sites in the input callset.
* </p>
*
* <h3>Examples</h3>
* <pre>
* Inform the genotype assignment of NA12878 using the 1000G Euro panel
@ -180,7 +188,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
*/
@Argument(fullName="numRefSamplesIfNoCall",shortName="nrs",doc="The number of homozygous reference to infer were " +
"seen at a position where an \"other callset\" contains no site or genotype information",required=false)
public int numRefIfMissing = 1;
public int numRefIfMissing = 0;
/**
* Rather than looking for the MLEAC field first, and then falling back to AC; first look for the AC field and then
@ -198,11 +206,15 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
"related individuals.",required=false)
public boolean ignoreInputSamples = false;
/**
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
*/
@Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false)
public boolean calcMissing = false;
@Output(doc="File to which variants should be written")
protected VariantContextWriter vcfWriter = null;
private final boolean NO_EM = false;
public void initialize() {
// Get list of samples to include in the output
final List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
@ -254,7 +266,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
final int missing = supportVariants.size() - otherVCs.size();
for ( VariantContext vc : vcs ) {
vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, NO_EM, defaultToAC));
vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing));
}
return 1;

View File

@ -62,28 +62,33 @@ public class PosteriorLikelihoodsUtils {
final int numRefSamplesFromMissingResources,
final double globalFrequencyPriorDirichlet,
final boolean useInputSamples,
final boolean useEM,
final boolean useAC) {
if ( useEM )
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
final boolean useAC,
final boolean calcMissing) {
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
boolean nonSNPprior = false;
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
final boolean nonSNPeval = !vc1.isSNP();
final double[] alleleCounts = new double[vc1.getNAlleles()];
//store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) {
addAlleleCounts(totalAlleleCounts,resource,useAC);
if(!nonSNPeval)
{
//store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) {
if( !resource.isSNP()) nonSNPprior = true;
addAlleleCounts(totalAlleleCounts,resource,useAC);
}
//add the allele counts from the input samples (if applicable)
if ( useInputSamples ) {
addAlleleCounts(totalAlleleCounts,vc1,useAC);
}
//add zero allele counts for any reference alleles not seen in priors (if applicable)
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
}
//add the allele counts from the input samples (if applicable)
if ( useInputSamples ) {
addAlleleCounts(totalAlleleCounts,vc1,useAC);
}
//add zero allele counts for any reference alleles not seen in priors (if applicable)
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
// now extract the counts of the alleles present within vc1, and in order
final double[] alleleCounts = new double[vc1.getNAlleles()];
int alleleIndex = 0;
for ( final Allele allele : vc1.getAlleles() ) {
@ -91,13 +96,17 @@ public class PosteriorLikelihoodsUtils {
totalAlleleCounts.get(allele) : 0 );
}
//parse the likelihoods for each sample's genotype
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
for ( final Genotype genotype : vc1.getGenotypes() ) {
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
}
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2));
//TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing);
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
final GenotypesContext newContext = GenotypesContext.create();
for ( int genoIdx = 0; genoIdx < vc1.getNSamples(); genoIdx ++ ) {
@ -112,7 +121,7 @@ public class PosteriorLikelihoodsUtils {
}
final List<Integer> priors = Utils.listFromPrimitives(
GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2))).getAsPLs());
GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2),useFlatPriors)).getAsPLs());
final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute("PG", priors);
// add in the AC, AF, and AN attributes
@ -126,16 +135,18 @@ public class PosteriorLikelihoodsUtils {
* @param genotypeLikelihoods - the genotype likelihoods for the individual
* @param knownAlleleCountsByAllele - the known allele counts in the population. For AC=2 AN=12 site, this is {10,2}
* @param ploidy - the ploidy to assume
* @param useFlatPriors - if true, apply flat priors to likelihoods in order to calculate posterior probabilities
* @return - the posterior genotype likelihoods
*/
protected static List<double[]> calculatePosteriorGLs(final List<double[]> genotypeLikelihoods,
final double[] knownAlleleCountsByAllele,
final int ploidy) {
final int ploidy,
final boolean useFlatPriors) {
if ( ploidy != 2 ) {
throw new IllegalStateException("Genotype posteriors not yet implemented for ploidy != 2");
}
final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy);
final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy, useFlatPriors);
final List<double[]> posteriors = new ArrayList<>(genotypeLikelihoods.size());
for ( final double[] likelihoods : genotypeLikelihoods ) {
double[] posteriorProbabilities = null;
@ -164,8 +175,9 @@ public class PosteriorLikelihoodsUtils {
// convenience function for a single genotypelikelihoods array. Just wraps.
protected static double[] calculatePosteriorGLs(final double[] genotypeLikelihoods,
final double[] knownAlleleCountsByAllele,
final int ploidy) {
return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy).get(0);
final int ploidy,
final boolean useFlatPriors) {
return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy, useFlatPriors).get(0);
}
@ -180,7 +192,7 @@ public class PosteriorLikelihoodsUtils {
* @param ploidy - the number of chromosomes in the sample. For now restricted to 2.
* @return - the Dirichlet-Multinomial distribution over genotype states
*/
protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy) {
protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy, final boolean useFlatPrior) {
if ( ploidy != 2 ) {
throw new IllegalStateException("Genotype priors not yet implemented for ploidy != 2");
}
@ -192,10 +204,14 @@ public class PosteriorLikelihoodsUtils {
int priorIndex = 0;
for ( int allele2 = 0; allele2 < knownCountsByAllele.length; allele2++ ) {
for ( int allele1 = 0; allele1 <= allele2; allele1++) {
final int[] counts = new int[knownCountsByAllele.length];
counts[allele1] += 1;
counts[allele2] += 1;
priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy);
if (useFlatPrior)
priors[priorIndex++] = 1.0;
else {
final int[] counts = new int[knownCountsByAllele.length];
counts[allele1] += 1;
counts[allele2] += 1;
priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy);
}
}
}

View File

@ -55,6 +55,19 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testUsingDiscoveredAF() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("80d0eedddd215df8ab29bde908c73ca4"));
executeTest("testUsingDiscoveredAF", spec);
}
@Test(enabled = true)
public void testMissingPriors() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" +
@ -62,8 +75,23 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("e1adedc7e1d63e384187b24b7ded4410"));
executeTest("testUsingDiscoveredAF", spec);
Arrays.asList("f7653ef21b859b90d54a71ef4245ec85"));
executeTest("testMissingPriors", spec);
}
@Test(enabled = true)
public void testInputINDELs() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "NA12878.Jan2013.haplotypeCaller.subset.indels.vcf" +
" -VV " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("6dd7dcf94bfe99ddcbd477100592db89"));
executeTest("testMissingPriors", spec);
}
}

View File

@ -137,7 +137,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s2",T,T,60,40,0),
makeG("s3",Aref,Aref,0,30,90));
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make();
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, true);
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686});
Assert.assertTrue(test1exp1.hasPL());
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05});
@ -155,7 +155,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,Aref,0,5,8,15,20,40),
makeG("s4",C,T,80,40,12,20,0,10));
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(2,2))).make();
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false,false);
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, true);
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193});
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778});
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677});
@ -177,7 +177,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
makeG("s_1",T,T),
makeG("s_2",Aref,T)));
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true);
// the counts here are ref=30, alt=14
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
@ -188,7 +188,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false);
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true);
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), "");
}
@ -198,7 +198,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] > GP[1]);
@ -209,7 +209,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] < GP[1]);
@ -220,7 +220,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] > GP[1]);
@ -231,7 +231,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] < GP[1]);
@ -244,7 +244,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test (expectedExceptions = {UserException.class})
@ -255,7 +255,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test (expectedExceptions = {UserException.class})
@ -265,7 +265,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
@ -275,7 +275,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
@ -285,7 +285,38 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
private void testInputIndel() {
VariantContext inputIndel = makeVC("1", Arrays.asList(Aref, ATC), makeG("s1",ATC,ATC,40,20,0),
makeG("s2",Aref,ATC,18,0,24),
makeG("s3",Aref,ATC,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
System.out.println(test1result);
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
int[] PLs = test1result.getGenotype(0).getPL();
Assert.assertEquals(PLs,GPs);
}
@Test
private void testPriorIndel() {
VariantContext inputIndel = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
System.out.println(test1result);
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
int[] PLs = test1result.getGenotype(0).getPL();
Assert.assertEquals(PLs,GPs);
}
private double[] pl2gl(int[] pl) {
@ -411,14 +442,14 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
knownCounts[1] = numAlt-altCount;
int expected_index = 0;
for ( int gl_index = 0; gl_index < likelihood_PLs.length; gl_index++ ) {
double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2);
double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2,false);
for ( int i = 0; i < post.length; i++ ) {
double expected = expectations[testIndex][expected_index++];
double observed = Math.pow(10.0,post[i]);
double err = Math.abs( (expected-observed)/expected );
Assert.assertTrue(err < 1e-4, String.format("Counts: %s | Expected: %e | Observed: %e | pre %s | prior %s | post %s",
Arrays.toString(knownCounts), expected,observed, Arrays.toString(pl2gl(likelihood_PLs[gl_index])),
Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2)),Arrays.toString(post)));
Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2,false)),Arrays.toString(post)));
}
}
testIndex++;
@ -466,17 +497,17 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
double expected_five[] = new double[] { -9.170334, -5.175724, -6.767055, -0.8250021, -5.126027, -0.07628661, -3.276762,
-3.977787, -2.227065, -4.57769, -5.494041, -2.995066, -7.444344, -7.096104, -2.414187};
double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2);
double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2);
double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2);
double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2);
double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2);
double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2,false);
double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2,false);
double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2,false);
double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2,false);
double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2,false);
double[] expecPrior5 = new double[] {-4.2878195, -4.2932090, -4.8845400, -1.9424874, -2.2435120, -0.1937719, -3.5942477,
-3.8952723, -1.5445506, -3.4951749, -2.6115263, -2.9125508, -0.5618292, -2.2135895,
-1.5316722};
Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2)));
Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false)));
Assert.assertTrue(arraysApproxEqual(expected_one,post1,1e-6),errMsgArray(expected_one,post1));
Assert.assertTrue(arraysApproxEqual(expected_two,post2,1e-5),errMsgArray(expected_two,post2));