Add error handling to CalculateGenotypePosteriors to catch multiallelic variants with wrong number of ACs

-- throws UserException; added tests in PosteriorLikelihoodsUtilsUnitTests
Add error handling to CalculateGenotypePosteriors for cases where MLEAC>AN; add tests in PosteriorLikelihoodsUtilsUnitTests
Add unit tests to confirm that CalculateGenotypePosteriors has the ability to switch genotypes for four cases
This commit is contained in:
Laura Gauthier 2014-02-18 11:44:19 -05:00
parent 7f9f58dbd1
commit 43fdd38342
3 changed files with 123 additions and 7 deletions

View File

@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFConstants;
@ -211,13 +212,13 @@ public class PosteriorLikelihoodsUtils {
final int[] ac;
//use MLEAC value...
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context);
}
//...unless specified by the user in useAC
//...unless specified by the user in useAC or unless MLEAC is absent
else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
ac = getAlleleCounts(VCFConstants.ALLELE_COUNT_KEY, context);
}
//if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field
//if VariantContext annotation doesn't contain AC or MLEAC then get the data from direct evaluation
else {
ac = new int[context.getAlternateAlleles().size()];
int idx = 0;
@ -248,6 +249,25 @@ public class PosteriorLikelihoodsUtils {
}
}
/**
* Retrieve allele count data from VariantContext using VCFkey, checks for correct number of values in VCF
* @param VCFkey VariantContext annotation tag of interest (should be AC or MLEAC)
* @param context VariantContext from which to extract the data
* @return int[] with allele count data
*/
private static int[] getAlleleCounts(final String VCFkey, final VariantContext context) {
final Object alleleCountsFromVCF = context.getAttribute(VCFkey);
if ( alleleCountsFromVCF instanceof List ) {
if ( ((List) alleleCountsFromVCF).size() != context.getAlternateAlleles().size() )
throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart()));
}
else if ( alleleCountsFromVCF instanceof String || alleleCountsFromVCF instanceof Integer) {//here length is 1
if (context.getAlternateAlleles().size() != 1)
throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart()));
}
return extractInts(alleleCountsFromVCF);
}
/**
* Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately
* @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute()

View File

@ -52,6 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
* Date: 12/8/13
*/
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.variantcontext.*;
@ -192,6 +193,101 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), "");
}
@Test
private void testCalculatePosteriorHOM_VARtoHET() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] > GP[1]);
}
@Test
private void testCalculatePosteriorHETtoHOM_VAR() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[2] < GP[1]);
}
@Test
private void testCalculatePosteriorHOM_REFtoHET() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] > GP[1]);
}
@Test
private void testCalculatePosteriorHETtoHOM_REF() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
Assert.assertTrue(GP[0] < GP[1]);
}
@Test
private void testMLEACgreaterThanAN() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test (expectedExceptions = {UserException.class})
private void testWrongNumberACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test (expectedExceptions = {UserException.class})
private void testWrongNumberMLEACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
private void testMultipleACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
private void testMultipleMLEACvalues() {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
makeG("s2",Aref,T,18,0,24),
makeG("s3",Aref,T,22,0,12));
List<VariantContext> supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
private double[] pl2gl(int[] pl) {
double[] gl = new double[pl.length];
for ( int idx = 0; idx < gl.length; idx++ ) {

View File

@ -42,9 +42,9 @@ public class ConcordanceMetrics {
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
private GenotypeConcordanceTable overallGenotypeConcordance;
private SiteConcordanceTable overallSiteConcordance;
private Boolean printInterestingSites;
private boolean printInterestingSites;
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) {
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) {
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
overlappingSamples.retainAll(truth.getGenotypeSamples());
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
@ -116,7 +116,7 @@ public class ConcordanceMetrics {
@Requires({"eval != null","truth != null"})
public void update(VariantContext eval, VariantContext truth) {
Boolean doPrint = false;
boolean doPrint = false;
overallSiteConcordance.update(eval,truth);
Set<String> alleleTruth = new HashSet<String>(8);
String truthRef = truth.getReference().getBaseString();