Fix to issue encountered when running HaplotypeCaller in GGA mode with data from other 1000G callers.
In particular, someone produced a tandem repeat site with 57 alt alleles (sic) which made the caller blow up. Inelegant fix is to detect if # of alleles is > our max cached capacity, and if so, emit an informative warning and skip site. -- Added unit test to UG engine to cover this case. -- Commit to posterity private scala script currently used for 1000G indel consensus (still very much subject to changes). GSA-878 #resolve
This commit is contained in:
parent
470746c907
commit
ea01dbf130
|
|
@ -385,11 +385,23 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||
|
||||
// TODO TODO TODO TODO
|
||||
// REFACTOR THIS FUNCTION, TOO UNWIELDY!!
|
||||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger));
|
||||
}
|
||||
|
||||
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
|
||||
if (!canVCbeGenotyped(vc)) {
|
||||
if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && !limitedContext)
|
||||
return generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext);
|
||||
else
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
// estimate our confidence in a reference call and return
|
||||
if ( vc.getNSamples() == 0 ) {
|
||||
if ( limitedContext )
|
||||
|
|
@ -544,6 +556,23 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether input VC to calculateGenotypes() can be genotyped and AF can be computed.
|
||||
* @param vc Input VC
|
||||
* @return Status check
|
||||
*/
|
||||
@Requires("vc != null")
|
||||
protected boolean canVCbeGenotyped(final VariantContext vc) {
|
||||
// protect against too many alternate alleles that we can't even run AF on:
|
||||
if (vc.getNAlleles()> GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) {
|
||||
logger.warn("Attempting to genotype more than "+GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED +
|
||||
" alleles. Site will be skipped at location "+vc.getChr()+":"+vc.getStart());
|
||||
return false;
|
||||
}
|
||||
else return true;
|
||||
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
if ( !BaseUtils.isRegularBase(refContext.getBase()) )
|
||||
|
|
|
|||
|
|
@ -50,10 +50,16 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
|
@ -102,4 +108,23 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(MathUtils.goodLog10Probability(ref), "Reference calculation wasn't a well formed log10 prob " + ref);
|
||||
Assert.assertEquals(ref, expected, TOLERANCE, "Failed reference confidence for single sample");
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testTooManyAlleles() {
|
||||
|
||||
for ( Integer numAltAlleles = 0; numAltAlleles < 100; numAltAlleles++ ) {
|
||||
|
||||
Set<Allele> alleles = new HashSet<Allele>();
|
||||
alleles.add(Allele.create("A", true)); // ref allele
|
||||
|
||||
for (int len = 1; len <=numAltAlleles; len++) {
|
||||
// add alt allele of length len+1
|
||||
alleles.add(Allele.create(Utils.dupString('A', len + 1), false));
|
||||
}
|
||||
final VariantContext vc = new VariantContextBuilder("test", "chr1", 1000, 1000, alleles).make();
|
||||
final boolean result = ugEngine.canVCbeGenotyped(vc);
|
||||
Assert.assertTrue(result == (vc.getNAlleles()<= GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue