Fix to issue encountered when running HaplotypeCaller in GGA mode with data from other 1000G callers.
In particular, someone produced a tandem repeat site with 57 alt alleles (sic) which made the caller blow up. Inelegant fix is to detect if # of alleles is > our max cached capacity, and if so, emit an informative warning and skip site. -- Added unit test to UG engine to cover this case. -- Commit to posterity private scala script currently used for 1000G indel consensus (still very much subject to changes). GSA-878 #resolve
This commit is contained in:
parent
470746c907
commit
ea01dbf130
|
|
@ -385,11 +385,23 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||||
|
|
||||||
|
// TODO TODO TODO TODO
|
||||||
|
// REFACTOR THIS FUNCTION, TOO UNWIELDY!!
|
||||||
|
|
||||||
// initialize the data for this thread if that hasn't been done yet
|
// initialize the data for this thread if that hasn't been done yet
|
||||||
if ( afcm.get() == null ) {
|
if ( afcm.get() == null ) {
|
||||||
afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger));
|
afcm.set(AFCalcFactory.createAFCalc(UAC, N, logger));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
|
||||||
|
if (!canVCbeGenotyped(vc)) {
|
||||||
|
if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && !limitedContext)
|
||||||
|
return generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext);
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// estimate our confidence in a reference call and return
|
// estimate our confidence in a reference call and return
|
||||||
if ( vc.getNSamples() == 0 ) {
|
if ( vc.getNSamples() == 0 ) {
|
||||||
if ( limitedContext )
|
if ( limitedContext )
|
||||||
|
|
@ -544,6 +556,23 @@ public class UnifiedGenotyperEngine {
|
||||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
|
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine whether input VC to calculateGenotypes() can be genotyped and AF can be computed.
|
||||||
|
* @param vc Input VC
|
||||||
|
* @return Status check
|
||||||
|
*/
|
||||||
|
@Requires("vc != null")
|
||||||
|
protected boolean canVCbeGenotyped(final VariantContext vc) {
|
||||||
|
// protect against too many alternate alleles that we can't even run AF on:
|
||||||
|
if (vc.getNAlleles()> GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED) {
|
||||||
|
logger.warn("Attempting to genotype more than "+GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED +
|
||||||
|
" alleles. Site will be skipped at location "+vc.getChr()+":"+vc.getStart());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
|
|
||||||
if ( !BaseUtils.isRegularBase(refContext.getBase()) )
|
if ( !BaseUtils.isRegularBase(refContext.getBase()) )
|
||||||
|
|
|
||||||
|
|
@ -50,10 +50,16 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.variant.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
|
||||||
|
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.BeforeMethod;
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
|
@ -102,4 +108,23 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(MathUtils.goodLog10Probability(ref), "Reference calculation wasn't a well formed log10 prob " + ref);
|
Assert.assertTrue(MathUtils.goodLog10Probability(ref), "Reference calculation wasn't a well formed log10 prob " + ref);
|
||||||
Assert.assertEquals(ref, expected, TOLERANCE, "Failed reference confidence for single sample");
|
Assert.assertEquals(ref, expected, TOLERANCE, "Failed reference confidence for single sample");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled=true)
|
||||||
|
public void testTooManyAlleles() {
|
||||||
|
|
||||||
|
for ( Integer numAltAlleles = 0; numAltAlleles < 100; numAltAlleles++ ) {
|
||||||
|
|
||||||
|
Set<Allele> alleles = new HashSet<Allele>();
|
||||||
|
alleles.add(Allele.create("A", true)); // ref allele
|
||||||
|
|
||||||
|
for (int len = 1; len <=numAltAlleles; len++) {
|
||||||
|
// add alt allele of length len+1
|
||||||
|
alleles.add(Allele.create(Utils.dupString('A', len + 1), false));
|
||||||
|
}
|
||||||
|
final VariantContext vc = new VariantContextBuilder("test", "chr1", 1000, 1000, alleles).make();
|
||||||
|
final boolean result = ugEngine.canVCbeGenotyped(vc);
|
||||||
|
Assert.assertTrue(result == (vc.getNAlleles()<= GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue