Add error handling to CalculateGenotypePosteriors to catch multiallelic variants with wrong number of ACs
-- throws UserException; added tests in PosteriorLikelihoodsUtilsUnitTests Add error handling to CalculateGenotypePosteriors for cases where MLEAC>AN; add tests in PosteriorLikelihoodsUtilsUnitTests Add unit tests to confirm that CalculateGenotypePosteriors has the ability to switch genotypes for four cases
This commit is contained in:
parent
7f9f58dbd1
commit
43fdd38342
|
|
@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
|
@ -211,13 +212,13 @@ public class PosteriorLikelihoodsUtils {
|
|||
final int[] ac;
|
||||
//use MLEAC value...
|
||||
if ( context.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && ! useAC ) {
|
||||
ac = extractInts(context.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
ac = getAlleleCounts(VCFConstants.MLE_ALLELE_COUNT_KEY, context);
|
||||
}
|
||||
//...unless specified by the user in useAC
|
||||
//...unless specified by the user in useAC or unless MLEAC is absent
|
||||
else if ( context.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
ac = extractInts(context.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
ac = getAlleleCounts(VCFConstants.ALLELE_COUNT_KEY, context);
|
||||
}
|
||||
//if VariantContext annotation doesn't contain AC/MLEAC then get the data from another field
|
||||
//if VariantContext annotation doesn't contain AC or MLEAC then get the data from direct evaluation
|
||||
else {
|
||||
ac = new int[context.getAlternateAlleles().size()];
|
||||
int idx = 0;
|
||||
|
|
@ -248,6 +249,25 @@ public class PosteriorLikelihoodsUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve allele count data from VariantContext using VCFkey, checks for correct number of values in VCF
|
||||
* @param VCFkey VariantContext annotation tag of interest (should be AC or MLEAC)
|
||||
* @param context VariantContext from which to extract the data
|
||||
* @return int[] with allele count data
|
||||
*/
|
||||
private static int[] getAlleleCounts(final String VCFkey, final VariantContext context) {
|
||||
final Object alleleCountsFromVCF = context.getAttribute(VCFkey);
|
||||
if ( alleleCountsFromVCF instanceof List ) {
|
||||
if ( ((List) alleleCountsFromVCF).size() != context.getAlternateAlleles().size() )
|
||||
throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart()));
|
||||
}
|
||||
else if ( alleleCountsFromVCF instanceof String || alleleCountsFromVCF instanceof Integer) {//here length is 1
|
||||
if (context.getAlternateAlleles().size() != 1)
|
||||
throw new UserException(String.format("Variant does not contain the same number of MLE allele counts as alternate alleles for record at %s:%d", context.getChr(), context.getStart()));
|
||||
}
|
||||
return extractInts(alleleCountsFromVCF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the formatting on the Object returned by a call to VariantContext::getAttribute() and parse appropriately
|
||||
* @param integerListContainingVCField - Object returned by a call to VariantContext::getAttribute()
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
* Date: 12/8/13
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -192,6 +193,101 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY))), "");
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testCalculatePosteriorHOM_VARtoHET() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[2] > GP[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testCalculatePosteriorHETtoHOM_VAR() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[2] < GP[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testCalculatePosteriorHOM_REFtoHET() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[0] > GP[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testCalculatePosteriorHETtoHOM_REF() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(VCFConstants.GENOTYPE_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[0] < GP[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testMLEACgreaterThanAN() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
|
||||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = {UserException.class})
|
||||
private void testWrongNumberACvalues() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
|
||||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = {UserException.class})
|
||||
private void testWrongNumberMLEACvalues() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
|
||||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testMultipleACvalues() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
|
||||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testMultipleMLEACvalues() {
|
||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,20,0),
|
||||
makeG("s2",Aref,T,18,0,24),
|
||||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
private double[] pl2gl(int[] pl) {
|
||||
double[] gl = new double[pl.length];
|
||||
for ( int idx = 0; idx < gl.length; idx++ ) {
|
||||
|
|
|
|||
|
|
@ -42,9 +42,9 @@ public class ConcordanceMetrics {
|
|||
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
||||
private GenotypeConcordanceTable overallGenotypeConcordance;
|
||||
private SiteConcordanceTable overallSiteConcordance;
|
||||
private Boolean printInterestingSites;
|
||||
private boolean printInterestingSites;
|
||||
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, Boolean printSitesEnabled) {
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth, boolean printSitesEnabled) {
|
||||
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
|
||||
overlappingSamples.retainAll(truth.getGenotypeSamples());
|
||||
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
|
||||
|
|
@ -116,7 +116,7 @@ public class ConcordanceMetrics {
|
|||
|
||||
@Requires({"eval != null","truth != null"})
|
||||
public void update(VariantContext eval, VariantContext truth) {
|
||||
Boolean doPrint = false;
|
||||
boolean doPrint = false;
|
||||
overallSiteConcordance.update(eval,truth);
|
||||
Set<String> alleleTruth = new HashSet<String>(8);
|
||||
String truthRef = truth.getReference().getBaseString();
|
||||
|
|
|
|||
Loading…
Reference in New Issue