Improvements to CalculateGenotypePosteriors and CalibrateGenotypeLikelihoods

CalculateGenotypePosteriors now only computes posterior probs for SNP sites with SNP priors
(other sites have flat priors applied)

CalibrateGenotypeLikelihoods had originally applied HOM_REF/HET/HOM_VAR frequencies in callset as priors before empirical quality analysis. Now has option (-noPriors) to not apply/apply flat priors. Also takes in new external probabilities files, such as those generated by CGP, from which the genotype posterior probability qualities will be read.

Integration test was changed to account for new SNP-only behavior and default behavior to not use missing priors.

(Also, new numRefIfMissing is 0, which should only matter in cases using few samples when you probably don't want to be doing that anyway!)
This commit is contained in:
Laura Gauthier 2014-03-17 08:51:02 -04:00
parent 92a3aa35d5
commit 9f3cbb2ef1
4 changed files with 141 additions and 54 deletions

View File

@ -108,6 +108,14 @@ import java.util.*;
* 3) Per-site genotype priors added to the INFO field ("PG")
* </p>
*
* <h3>Notes</h3>
* <p>
* Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the
* matching site in the priors VCF unless the --calculateMissingPriors flag is used).
* If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP
* sites in the input callset.
* </p>
*
* <h3>Examples</h3>
* <pre>
* Inform the genotype assignment of NA12878 using the 1000G Euro panel
@ -180,7 +188,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
*/
@Argument(fullName="numRefSamplesIfNoCall",shortName="nrs",doc="The number of homozygous reference to infer were " +
"seen at a position where an \"other callset\" contains no site or genotype information",required=false)
public int numRefIfMissing = 1;
public int numRefIfMissing = 0;
/**
* Rather than looking for the MLEAC field first, and then falling back to AC; first look for the AC field and then
@ -198,11 +206,15 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
"related individuals.",required=false)
public boolean ignoreInputSamples = false;
/**
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
*/
@Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false)
public boolean calcMissing = false;
@Output(doc="File to which variants should be written")
protected VariantContextWriter vcfWriter = null;
private final boolean NO_EM = false;
public void initialize() {
// Get list of samples to include in the output
final List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
@ -254,7 +266,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
final int missing = supportVariants.size() - otherVCs.size();
for ( VariantContext vc : vcs ) {
vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, NO_EM, defaultToAC));
vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing));
}
return 1;

View File

@ -62,28 +62,33 @@ public class PosteriorLikelihoodsUtils {
final int numRefSamplesFromMissingResources,
final double globalFrequencyPriorDirichlet,
final boolean useInputSamples,
final boolean useEM,
final boolean useAC) {
if ( useEM )
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
final boolean useAC,
final boolean calcMissing) {
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
boolean nonSNPprior = false;
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
final boolean nonSNPeval = !vc1.isSNP();
final double[] alleleCounts = new double[vc1.getNAlleles()];
//store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) {
addAlleleCounts(totalAlleleCounts,resource,useAC);
if(!nonSNPeval)
{
//store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) {
if( !resource.isSNP()) nonSNPprior = true;
addAlleleCounts(totalAlleleCounts,resource,useAC);
}
//add the allele counts from the input samples (if applicable)
if ( useInputSamples ) {
addAlleleCounts(totalAlleleCounts,vc1,useAC);
}
//add zero allele counts for any reference alleles not seen in priors (if applicable)
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
}
//add the allele counts from the input samples (if applicable)
if ( useInputSamples ) {
addAlleleCounts(totalAlleleCounts,vc1,useAC);
}
//add zero allele counts for any reference alleles not seen in priors (if applicable)
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
// now extract the counts of the alleles present within vc1, and in order
final double[] alleleCounts = new double[vc1.getNAlleles()];
int alleleIndex = 0;
for ( final Allele allele : vc1.getAlleles() ) {
@ -91,13 +96,17 @@ public class PosteriorLikelihoodsUtils {
totalAlleleCounts.get(allele) : 0 );
}
//parse the likelihoods for each sample's genotype
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
for ( final Genotype genotype : vc1.getGenotypes() ) {
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
}
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2));
//TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing);
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
final GenotypesContext newContext = GenotypesContext.create();
for ( int genoIdx = 0; genoIdx < vc1.getNSamples(); genoIdx ++ ) {
@ -112,7 +121,7 @@ public class PosteriorLikelihoodsUtils {
}
final List<Integer> priors = Utils.listFromPrimitives(
GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2))).getAsPLs());
GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2),useFlatPriors)).getAsPLs());
final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute("PG", priors);
// add in the AC, AF, and AN attributes
@ -126,16 +135,18 @@ public class PosteriorLikelihoodsUtils {
* @param genotypeLikelihoods - the genotype likelihoods for the individual
* @param knownAlleleCountsByAllele - the known allele counts in the population. For AC=2 AN=12 site, this is {10,2}
* @param ploidy - the ploidy to assume
* @param useFlatPriors - if true, apply flat priors to likelihoods in order to calculate posterior probabilities
* @return - the posterior genotype likelihoods
*/
protected static List<double[]> calculatePosteriorGLs(final List<double[]> genotypeLikelihoods,
final double[] knownAlleleCountsByAllele,
final int ploidy) {
final int ploidy,
final boolean useFlatPriors) {
if ( ploidy != 2 ) {
throw new IllegalStateException("Genotype posteriors not yet implemented for ploidy != 2");
}
final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy);
final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy, useFlatPriors);
final List<double[]> posteriors = new ArrayList<>(genotypeLikelihoods.size());
for ( final double[] likelihoods : genotypeLikelihoods ) {
double[] posteriorProbabilities = null;
@ -164,8 +175,9 @@ public class PosteriorLikelihoodsUtils {
// convenience function for a single genotypelikelihoods array. Just wraps.
protected static double[] calculatePosteriorGLs(final double[] genotypeLikelihoods,
final double[] knownAlleleCountsByAllele,
final int ploidy) {
return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy).get(0);
final int ploidy,
final boolean useFlatPriors) {
return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy, useFlatPriors).get(0);
}
@ -180,7 +192,7 @@ public class PosteriorLikelihoodsUtils {
* @param ploidy - the number of chromosomes in the sample. For now restricted to 2.
* @return - the Dirichlet-Multinomial distribution over genotype states
*/
protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy) {
protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy, final boolean useFlatPrior) {
if ( ploidy != 2 ) {
throw new IllegalStateException("Genotype priors not yet implemented for ploidy != 2");
}
@ -192,10 +204,14 @@ public class PosteriorLikelihoodsUtils {
int priorIndex = 0;
for ( int allele2 = 0; allele2 < knownCountsByAllele.length; allele2++ ) {
for ( int allele1 = 0; allele1 <= allele2; allele1++) {
final int[] counts = new int[knownCountsByAllele.length];
counts[allele1] += 1;
counts[allele2] += 1;
priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy);
if (useFlatPrior)
priors[priorIndex++] = 1.0;
else {
final int[] counts = new int[knownCountsByAllele.length];
counts[allele1] += 1;
counts[allele2] += 1;
priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy);
}
}
}

View File

@ -55,6 +55,19 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testUsingDiscoveredAF() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("80d0eedddd215df8ab29bde908c73ca4"));
executeTest("testUsingDiscoveredAF", spec);
}
@Test(enabled = true)
public void testMissingPriors() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" +
@ -62,8 +75,23 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("e1adedc7e1d63e384187b24b7ded4410"));
executeTest("testUsingDiscoveredAF", spec);
Arrays.asList("f7653ef21b859b90d54a71ef4245ec85"));
executeTest("testMissingPriors", spec);
}
@Test(enabled = true)
public void testInputINDELs() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "NA12878.Jan2013.haplotypeCaller.subset.indels.vcf" +
" -VV " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("6dd7dcf94bfe99ddcbd477100592db89"));
executeTest("testMissingPriors", spec);
}
}

View File

@ -137,7 +137,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s2",T,T,60,40,0),
makeG("s3",Aref,Aref,0,30,90));
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make();
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, true);
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686});
Assert.assertTrue(test1exp1.hasPL());
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05});
@ -155,7 +155,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,Aref,0,5,8,15,20,40),
makeG("s4",C,T,80,40,12,20,0,10));
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(2,2))).make();
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false,false);
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, true);
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193});
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778});
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677});
@ -177,7 +177,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
makeG("s_1",T,T),
makeG("s_2",Aref,T)));
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true);
// the counts here are ref=30, alt=14
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
@ -188,7 +188,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false);
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true);
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), "");
}
@ -198,7 +198,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] > GP[1]);
@ -209,7 +209,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] < GP[1]);
@ -220,7 +220,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] > GP[1]);
@ -231,7 +231,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] < GP[1]);
@ -244,7 +244,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test (expectedExceptions = {UserException.class})
@ -255,7 +255,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test (expectedExceptions = {UserException.class})
@ -265,7 +265,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
@ -275,7 +275,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
@ -285,7 +285,38 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
private void testInputIndel() {
VariantContext inputIndel = makeVC("1", Arrays.asList(Aref, ATC), makeG("s1",ATC,ATC,40,20,0),
makeG("s2",Aref,ATC,18,0,24),
makeG("s3",Aref,ATC,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
System.out.println(test1result);
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
int[] PLs = test1result.getGenotype(0).getPL();
Assert.assertEquals(PLs,GPs);
}
@Test
private void testPriorIndel() {
VariantContext inputIndel = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
System.out.println(test1result);
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
int[] PLs = test1result.getGenotype(0).getPL();
Assert.assertEquals(PLs,GPs);
}
private double[] pl2gl(int[] pl) {
@ -411,14 +442,14 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
knownCounts[1] = numAlt-altCount;
int expected_index = 0;
for ( int gl_index = 0; gl_index < likelihood_PLs.length; gl_index++ ) {
double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2);
double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2,false);
for ( int i = 0; i < post.length; i++ ) {
double expected = expectations[testIndex][expected_index++];
double observed = Math.pow(10.0,post[i]);
double err = Math.abs( (expected-observed)/expected );
Assert.assertTrue(err < 1e-4, String.format("Counts: %s | Expected: %e | Observed: %e | pre %s | prior %s | post %s",
Arrays.toString(knownCounts), expected,observed, Arrays.toString(pl2gl(likelihood_PLs[gl_index])),
Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2)),Arrays.toString(post)));
Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2,false)),Arrays.toString(post)));
}
}
testIndex++;
@ -466,17 +497,17 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
double expected_five[] = new double[] { -9.170334, -5.175724, -6.767055, -0.8250021, -5.126027, -0.07628661, -3.276762,
-3.977787, -2.227065, -4.57769, -5.494041, -2.995066, -7.444344, -7.096104, -2.414187};
double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2);
double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2);
double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2);
double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2);
double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2);
double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2,false);
double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2,false);
double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2,false);
double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2,false);
double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2,false);
double[] expecPrior5 = new double[] {-4.2878195, -4.2932090, -4.8845400, -1.9424874, -2.2435120, -0.1937719, -3.5942477,
-3.8952723, -1.5445506, -3.4951749, -2.6115263, -2.9125508, -0.5618292, -2.2135895,
-1.5316722};
Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2)));
Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false)));
Assert.assertTrue(arraysApproxEqual(expected_one,post1,1e-6),errMsgArray(expected_one,post1));
Assert.assertTrue(arraysApproxEqual(expected_two,post2,1e-5),errMsgArray(expected_two,post2));