Improvements to CalculateGenotypePosteriors and CalibrateGenotypeLikelihoods

CalculateGenotypePosteriors now only computes posterior probs for SNP sites with SNP priors
(other sites have flat priors applied)

CalibrateGenotypeLikelihoods had originally applied HOM_REF/HET/HOM_VAR frequencies in callset as priors before empirical quality analysis. Now has option (-noPriors) to not apply/apply flat priors. Also takes in new external probabilities files, such as those generated by CGP, from which the genotype posterior probability qualities will be read.

Integration test was changed to account for new SNP-only behavior and default behavior to not use missing priors.

(Also, new numRefIfMissing is 0, which should only matter in cases using few samples when you probably don't want to be doing that anyway!)
This commit is contained in:
Laura Gauthier 2014-03-17 08:51:02 -04:00
parent 92a3aa35d5
commit 9f3cbb2ef1
4 changed files with 141 additions and 54 deletions

View File

@ -108,6 +108,14 @@ import java.util.*;
* 3) Per-site genotype priors added to the INFO field ("PG") * 3) Per-site genotype priors added to the INFO field ("PG")
* </p> * </p>
* *
* <h3>Notes</h3>
* <p>
* Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the
* matching site in the priors VCF unless the --calculateMissingPriors flag is used).
* If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP
* sites in the input callset.
* </p>
*
* <h3>Examples</h3> * <h3>Examples</h3>
* <pre> * <pre>
* Inform the genotype assignment of NA12878 using the 1000G Euro panel * Inform the genotype assignment of NA12878 using the 1000G Euro panel
@ -180,7 +188,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
*/ */
@Argument(fullName="numRefSamplesIfNoCall",shortName="nrs",doc="The number of homozygous reference to infer were " + @Argument(fullName="numRefSamplesIfNoCall",shortName="nrs",doc="The number of homozygous reference to infer were " +
"seen at a position where an \"other callset\" contains no site or genotype information",required=false) "seen at a position where an \"other callset\" contains no site or genotype information",required=false)
public int numRefIfMissing = 1; public int numRefIfMissing = 0;
/** /**
* Rather than looking for the MLEAC field first, and then falling back to AC; first look for the AC field and then * Rather than looking for the MLEAC field first, and then falling back to AC; first look for the AC field and then
@ -198,11 +206,15 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
"related individuals.",required=false) "related individuals.",required=false)
public boolean ignoreInputSamples = false; public boolean ignoreInputSamples = false;
/**
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
*/
@Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false)
public boolean calcMissing = false;
@Output(doc="File to which variants should be written") @Output(doc="File to which variants should be written")
protected VariantContextWriter vcfWriter = null; protected VariantContextWriter vcfWriter = null;
private final boolean NO_EM = false;
public void initialize() { public void initialize() {
// Get list of samples to include in the output // Get list of samples to include in the output
final List<String> rodNames = Arrays.asList(variantCollection.variants.getName()); final List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
@ -254,7 +266,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
final int missing = supportVariants.size() - otherVCs.size(); final int missing = supportVariants.size() - otherVCs.size();
for ( VariantContext vc : vcs ) { for ( VariantContext vc : vcs ) {
vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, NO_EM, defaultToAC)); vcfWriter.add(PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing));
} }
return 1; return 1;

View File

@ -62,15 +62,20 @@ public class PosteriorLikelihoodsUtils {
final int numRefSamplesFromMissingResources, final int numRefSamplesFromMissingResources,
final double globalFrequencyPriorDirichlet, final double globalFrequencyPriorDirichlet,
final boolean useInputSamples, final boolean useInputSamples,
final boolean useEM, final boolean useAC,
final boolean useAC) { final boolean calcMissing) {
if ( useEM )
throw new IllegalArgumentException("EM loop for posterior GLs not yet implemented");
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>(); final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
boolean nonSNPprior = false;
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
final boolean nonSNPeval = !vc1.isSNP();
final double[] alleleCounts = new double[vc1.getNAlleles()];
if(!nonSNPeval)
{
//store the allele counts for each allele in the variant priors //store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) { for ( final VariantContext resource : resources ) {
if( !resource.isSNP()) nonSNPprior = true;
addAlleleCounts(totalAlleleCounts,resource,useAC); addAlleleCounts(totalAlleleCounts,resource,useAC);
} }
@ -81,9 +86,9 @@ public class PosteriorLikelihoodsUtils {
//add zero allele counts for any reference alleles not seen in priors (if applicable) //add zero allele counts for any reference alleles not seen in priors (if applicable)
totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources); totalAlleleCounts.put(vc1.getReference(),totalAlleleCounts.get(vc1.getReference())+numRefSamplesFromMissingResources);
}
// now extract the counts of the alleles present within vc1, and in order // now extract the counts of the alleles present within vc1, and in order
final double[] alleleCounts = new double[vc1.getNAlleles()];
int alleleIndex = 0; int alleleIndex = 0;
for ( final Allele allele : vc1.getAlleles() ) { for ( final Allele allele : vc1.getAlleles() ) {
@ -91,13 +96,17 @@ public class PosteriorLikelihoodsUtils {
totalAlleleCounts.get(allele) : 0 ); totalAlleleCounts.get(allele) : 0 );
} }
//parse the likelihoods for each sample's genotype //parse the likelihoods for each sample's genotype
final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples()); final List<double[]> likelihoods = new ArrayList<>(vc1.getNSamples());
for ( final Genotype genotype : vc1.getGenotypes() ) { for ( final Genotype genotype : vc1.getGenotypes() ) {
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null ); likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
} }
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2)); //TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing);
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
final GenotypesContext newContext = GenotypesContext.create(); final GenotypesContext newContext = GenotypesContext.create();
for ( int genoIdx = 0; genoIdx < vc1.getNSamples(); genoIdx ++ ) { for ( int genoIdx = 0; genoIdx < vc1.getNSamples(); genoIdx ++ ) {
@ -112,7 +121,7 @@ public class PosteriorLikelihoodsUtils {
} }
final List<Integer> priors = Utils.listFromPrimitives( final List<Integer> priors = Utils.listFromPrimitives(
GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2))).getAsPLs()); GenotypeLikelihoods.fromLog10Likelihoods(getDirichletPrior(alleleCounts, vc1.getMaxPloidy(2),useFlatPriors)).getAsPLs());
final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute("PG", priors); final VariantContextBuilder builder = new VariantContextBuilder(vc1).genotypes(newContext).attribute("PG", priors);
// add in the AC, AF, and AN attributes // add in the AC, AF, and AN attributes
@ -126,16 +135,18 @@ public class PosteriorLikelihoodsUtils {
* @param genotypeLikelihoods - the genotype likelihoods for the individual * @param genotypeLikelihoods - the genotype likelihoods for the individual
* @param knownAlleleCountsByAllele - the known allele counts in the population. For AC=2 AN=12 site, this is {10,2} * @param knownAlleleCountsByAllele - the known allele counts in the population. For AC=2 AN=12 site, this is {10,2}
* @param ploidy - the ploidy to assume * @param ploidy - the ploidy to assume
* @param useFlatPriors - if true, apply flat priors to likelihoods in order to calculate posterior probabilities
* @return - the posterior genotype likelihoods * @return - the posterior genotype likelihoods
*/ */
protected static List<double[]> calculatePosteriorGLs(final List<double[]> genotypeLikelihoods, protected static List<double[]> calculatePosteriorGLs(final List<double[]> genotypeLikelihoods,
final double[] knownAlleleCountsByAllele, final double[] knownAlleleCountsByAllele,
final int ploidy) { final int ploidy,
final boolean useFlatPriors) {
if ( ploidy != 2 ) { if ( ploidy != 2 ) {
throw new IllegalStateException("Genotype posteriors not yet implemented for ploidy != 2"); throw new IllegalStateException("Genotype posteriors not yet implemented for ploidy != 2");
} }
final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy); final double[] genotypePriorByAllele = getDirichletPrior(knownAlleleCountsByAllele,ploidy, useFlatPriors);
final List<double[]> posteriors = new ArrayList<>(genotypeLikelihoods.size()); final List<double[]> posteriors = new ArrayList<>(genotypeLikelihoods.size());
for ( final double[] likelihoods : genotypeLikelihoods ) { for ( final double[] likelihoods : genotypeLikelihoods ) {
double[] posteriorProbabilities = null; double[] posteriorProbabilities = null;
@ -164,8 +175,9 @@ public class PosteriorLikelihoodsUtils {
// convenience function for a single genotypelikelihoods array. Just wraps. // convenience function for a single genotypelikelihoods array. Just wraps.
protected static double[] calculatePosteriorGLs(final double[] genotypeLikelihoods, protected static double[] calculatePosteriorGLs(final double[] genotypeLikelihoods,
final double[] knownAlleleCountsByAllele, final double[] knownAlleleCountsByAllele,
final int ploidy) { final int ploidy,
return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy).get(0); final boolean useFlatPriors) {
return calculatePosteriorGLs(Arrays.asList(genotypeLikelihoods),knownAlleleCountsByAllele,ploidy, useFlatPriors).get(0);
} }
@ -180,7 +192,7 @@ public class PosteriorLikelihoodsUtils {
* @param ploidy - the number of chromosomes in the sample. For now restricted to 2. * @param ploidy - the number of chromosomes in the sample. For now restricted to 2.
* @return - the Dirichlet-Multinomial distribution over genotype states * @return - the Dirichlet-Multinomial distribution over genotype states
*/ */
protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy) { protected static double[] getDirichletPrior(final double[] knownCountsByAllele, final int ploidy, final boolean useFlatPrior) {
if ( ploidy != 2 ) { if ( ploidy != 2 ) {
throw new IllegalStateException("Genotype priors not yet implemented for ploidy != 2"); throw new IllegalStateException("Genotype priors not yet implemented for ploidy != 2");
} }
@ -192,12 +204,16 @@ public class PosteriorLikelihoodsUtils {
int priorIndex = 0; int priorIndex = 0;
for ( int allele2 = 0; allele2 < knownCountsByAllele.length; allele2++ ) { for ( int allele2 = 0; allele2 < knownCountsByAllele.length; allele2++ ) {
for ( int allele1 = 0; allele1 <= allele2; allele1++) { for ( int allele1 = 0; allele1 <= allele2; allele1++) {
if (useFlatPrior)
priors[priorIndex++] = 1.0;
else {
final int[] counts = new int[knownCountsByAllele.length]; final int[] counts = new int[knownCountsByAllele.length];
counts[allele1] += 1; counts[allele1] += 1;
counts[allele2] += 1; counts[allele2] += 1;
priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy); priors[priorIndex++] = MathUtils.dirichletMultinomial(knownCountsByAllele,sumOfKnownCounts,counts,ploidy);
} }
} }
}
return priors; return priors;
} }

View File

@ -55,6 +55,19 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testUsingDiscoveredAF() { public void testUsingDiscoveredAF() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("80d0eedddd215df8ab29bde908c73ca4"));
executeTest("testUsingDiscoveredAF", spec);
}
@Test(enabled = true)
public void testMissingPriors() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header" + "-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" + " -o %s" +
@ -62,8 +75,23 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
" -L 20:10,000,000-10,100,000" + " -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf", " -V " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1, 1,
Arrays.asList("e1adedc7e1d63e384187b24b7ded4410")); Arrays.asList("f7653ef21b859b90d54a71ef4245ec85"));
executeTest("testUsingDiscoveredAF", spec); executeTest("testMissingPriors", spec);
} }
@Test(enabled = true)
public void testInputINDELs() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
" -V " + validationDataLocation + "NA12878.Jan2013.haplotypeCaller.subset.indels.vcf" +
" -VV " + validationDataLocation + "1000G.phase3.broad.withGenotypes.chr20.1MB.vcf",
1,
Arrays.asList("6dd7dcf94bfe99ddcbd477100592db89"));
executeTest("testMissingPriors", spec);
}
} }

View File

@ -137,7 +137,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s2",T,T,60,40,0), makeG("s2",T,T,60,40,0),
makeG("s3",Aref,Aref,0,30,90)); makeG("s3",Aref,Aref,0,30,90));
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make(); test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make();
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, true);
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686}); Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686});
Assert.assertTrue(test1exp1.hasPL()); Assert.assertTrue(test1exp1.hasPL());
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05}); Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05});
@ -155,7 +155,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,Aref,0,5,8,15,20,40), makeG("s3",Aref,Aref,0,5,8,15,20,40),
makeG("s4",C,T,80,40,12,20,0,10)); makeG("s4",C,T,80,40,12,20,0,10));
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(2,2))).make(); test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(2,2))).make();
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false,false); VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, true);
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193}); Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193});
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778}); Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778});
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677}); Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677});
@ -177,7 +177,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
supplTest1.add(makeVC("4",Arrays.asList(Aref,T), supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
makeG("s_1",T,T), makeG("s_1",T,T),
makeG("s_2",Aref,T))); makeG("s_2",Aref,T)));
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true);
// the counts here are ref=30, alt=14 // the counts here are ref=30, alt=14
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766}); Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024}); Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
@ -188,7 +188,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0)); VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0))); List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false); VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true);
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066}); Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), ""); Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), "");
} }
@ -198,7 +198,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0)); VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] > GP[1]); Assert.assertTrue(GP[2] > GP[1]);
@ -209,7 +209,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1)); VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] < GP[1]); Assert.assertTrue(GP[2] < GP[1]);
@ -220,7 +220,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40)); VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] > GP[1]); Assert.assertTrue(GP[0] > GP[1]);
@ -231,7 +231,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40)); VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY)); int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] < GP[1]); Assert.assertTrue(GP[0] < GP[1]);
@ -244,7 +244,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12)); makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
} }
@Test (expectedExceptions = {UserException.class}) @Test (expectedExceptions = {UserException.class})
@ -255,7 +255,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
} }
@Test (expectedExceptions = {UserException.class}) @Test (expectedExceptions = {UserException.class})
@ -265,7 +265,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12)); makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
} }
@Test @Test
@ -275,7 +275,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12)); makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
} }
@Test @Test
@ -285,7 +285,38 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12)); makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1); List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make()); supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false); VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
}
@Test
private void testInputIndel() {
VariantContext inputIndel = makeVC("1", Arrays.asList(Aref, ATC), makeG("s1",ATC,ATC,40,20,0),
makeG("s2",Aref,ATC,18,0,24),
makeG("s3",Aref,ATC,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
System.out.println(test1result);
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
int[] PLs = test1result.getGenotype(0).getPL();
Assert.assertEquals(PLs,GPs);
}
@Test
private void testPriorIndel() {
VariantContext inputIndel = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
System.out.println(test1result);
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
int[] PLs = test1result.getGenotype(0).getPL();
Assert.assertEquals(PLs,GPs);
} }
private double[] pl2gl(int[] pl) { private double[] pl2gl(int[] pl) {
@ -411,14 +442,14 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
knownCounts[1] = numAlt-altCount; knownCounts[1] = numAlt-altCount;
int expected_index = 0; int expected_index = 0;
for ( int gl_index = 0; gl_index < likelihood_PLs.length; gl_index++ ) { for ( int gl_index = 0; gl_index < likelihood_PLs.length; gl_index++ ) {
double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2); double[] post = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(likelihood_PLs[gl_index]), knownCounts, 2,false);
for ( int i = 0; i < post.length; i++ ) { for ( int i = 0; i < post.length; i++ ) {
double expected = expectations[testIndex][expected_index++]; double expected = expectations[testIndex][expected_index++];
double observed = Math.pow(10.0,post[i]); double observed = Math.pow(10.0,post[i]);
double err = Math.abs( (expected-observed)/expected ); double err = Math.abs( (expected-observed)/expected );
Assert.assertTrue(err < 1e-4, String.format("Counts: %s | Expected: %e | Observed: %e | pre %s | prior %s | post %s", Assert.assertTrue(err < 1e-4, String.format("Counts: %s | Expected: %e | Observed: %e | pre %s | prior %s | post %s",
Arrays.toString(knownCounts), expected,observed, Arrays.toString(pl2gl(likelihood_PLs[gl_index])), Arrays.toString(knownCounts), expected,observed, Arrays.toString(pl2gl(likelihood_PLs[gl_index])),
Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2)),Arrays.toString(post))); Arrays.toString(PosteriorLikelihoodsUtils.getDirichletPrior(knownCounts,2,false)),Arrays.toString(post)));
} }
} }
testIndex++; testIndex++;
@ -466,17 +497,17 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
double expected_five[] = new double[] { -9.170334, -5.175724, -6.767055, -0.8250021, -5.126027, -0.07628661, -3.276762, double expected_five[] = new double[] { -9.170334, -5.175724, -6.767055, -0.8250021, -5.126027, -0.07628661, -3.276762,
-3.977787, -2.227065, -4.57769, -5.494041, -2.995066, -7.444344, -7.096104, -2.414187}; -3.977787, -2.227065, -4.57769, -5.494041, -2.995066, -7.444344, -7.096104, -2.414187};
double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2); double[] post1 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_one),counts_one,2,false);
double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2); double[] post2 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_two),counts_two,2,false);
double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2); double[] post3 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_three),counts_three,2,false);
double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2); double[] post4 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_four),counts_four,2,false);
double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2); double[] post5 = PosteriorLikelihoodsUtils.calculatePosteriorGLs(pl2gl(PL_five),counts_five,2,false);
double[] expecPrior5 = new double[] {-4.2878195, -4.2932090, -4.8845400, -1.9424874, -2.2435120, -0.1937719, -3.5942477, double[] expecPrior5 = new double[] {-4.2878195, -4.2932090, -4.8845400, -1.9424874, -2.2435120, -0.1937719, -3.5942477,
-3.8952723, -1.5445506, -3.4951749, -2.6115263, -2.9125508, -0.5618292, -2.2135895, -3.8952723, -1.5445506, -3.4951749, -2.6115263, -2.9125508, -0.5618292, -2.2135895,
-1.5316722}; -1.5316722};
Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2))); Assert.assertTrue(arraysApproxEqual(expecPrior5, PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false),1e-5),errMsgArray(expecPrior5,PosteriorLikelihoodsUtils.getDirichletPrior(counts_five,2,false)));
Assert.assertTrue(arraysApproxEqual(expected_one,post1,1e-6),errMsgArray(expected_one,post1)); Assert.assertTrue(arraysApproxEqual(expected_one,post1,1e-6),errMsgArray(expected_one,post1));
Assert.assertTrue(arraysApproxEqual(expected_two,post2,1e-5),errMsgArray(expected_two,post2)); Assert.assertTrue(arraysApproxEqual(expected_two,post2,1e-5),errMsgArray(expected_two,post2));