Fixed the bug first by indicating the actual possible number of alternatives alleles considering the extra <NON_REF> and second by resizing the StateTracker capacity when invoked by GeneralPloidyExactAFCalc deep within its implementation of computeLog10PNonRef which is ultimatelly what get rids of the exception.

Story:

  https://www.pivotaltracker.com/story/show/74471252
This commit is contained in:
Valentin Ruano-Rubio 2014-08-19 11:44:24 -04:00
parent 78c2da1fef
commit d31c5536aa
7 changed files with 91 additions and 35 deletions

View File

@ -101,13 +101,17 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>() {
protected ThreadLocal<AFCalc> afcm = getAlleleFrequencyCalculatorThreadLocal();
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger);
}
};
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger);
}
};
}
/**

View File

@ -151,7 +151,7 @@ public class AFCalcFactory {
* @return an initialized AFCalc
*/
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
final int nSamples,
final int nSamples, final boolean emitConfModel,
final Logger logger) {
final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
UAC.requestedAlleleFrequencyCalculationModel);

View File

@ -163,18 +163,20 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
set.getLog10Likelihoods()[0] = 0.0;
final int genotypeCount = genotypeLikelihoods.size();
combinedPoolLikelihoods.add(set);
if ( genotypeLikelihoods.size() <= 1 ) {
getStateTracker().reset(numAlleles - 1);
if ( genotypeCount <= 1 ) {
// no meaningful GLs at all, just set the tracker to non poly values
getStateTracker().reset(); // just mimic-ing call below
// just mimic-ing call below
getStateTracker().setLog10LikelihoodOfAFzero(0.0);
} else {
for (int p=1; p<genotypeLikelihoods.size(); p++) {
getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
for (int p=1; p<genotypeCount; p++) {
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p),
combinedPloidy, ploidyPerPool, numAlleles, log10AlleleFrequencyPriors);
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
if (p < genotypeCount - 1) getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
}
}
}

View File

@ -82,8 +82,8 @@ final class StateTracker {
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
* are meaningful.
*/
private final int[] alleleCountsOfMLE;
private final int[] alleleCountsOfMAP;
private int[] alleleCountsOfMLE;
private int[] alleleCountsOfMAP;
/**
* A vector of log10 likelihood values seen, for future summation. When the size of the
@ -227,6 +227,31 @@ final class StateTracker {
//
// --------------------------------------------------------------------------------
/**
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
*
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
*
* @param ensureAltAlleleCapacity indicate the minimum number of alt-alleles that should be supported by the
* tracker.
*/
protected void reset(final int ensureAltAlleleCapacity) {
log10MLE = log10MAP = log10LikelihoodOfAFzero = VALUE_NOT_CALCULATED;
log10LikelihoodsForAFGt0CacheIndex = 0;
log10LikelihoodsForAFGt0Sum = null;
allelesUsedInGenotyping = null;
nEvaluations = 0;
if (alleleCountsOfMAP.length < ensureAltAlleleCapacity) {
final int newCapacity = Math.max(ensureAltAlleleCapacity,alleleCountsOfMAP.length << 1);
alleleCountsOfMAP = new int[newCapacity];
alleleCountsOfMLE = new int[newCapacity];
} else {
Arrays.fill(alleleCountsOfMLE, 0);
Arrays.fill(alleleCountsOfMAP, 0);
}
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
}
/**
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
*
@ -243,6 +268,7 @@ final class StateTracker {
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
}
/**
* Tell this result we used one more evaluation cycle
*/

View File

@ -68,8 +68,12 @@ import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
@ -623,6 +627,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
public void initialize() {
super.initialize();
if (dontGenotype && emitReferenceConfidence())
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
@ -702,9 +708,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY) {
if (SCAC.emitReferenceConfidence != ReferenceConfidenceMode.NONE)
throw new UserException.BadArgumentValue("ERC", "For now ploidies different that 2 are not allow for GVCF or BP_RESOLUTION outputs");
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY || SCAC.requestedAlleleFrequencyCalculationModel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY) {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
}

View File

@ -51,6 +51,8 @@ import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
@ -103,6 +105,18 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
this(configuration,samples,genomeLocParser,false);
}
@Override
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes,
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger);
}
};
}
/**
* Change the merge variant across haplotypes for this engine.
*

View File

@ -66,12 +66,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "5cc1858896aca6683282f53054bb7a61"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "010a747f5c41ddb7889168e499eb40bb"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d7dbc1c8e11a277e9db857eb766fd2c6"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "799752d88c4e15e19a953add764d2239"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "fa057b35d6fe9588c2653b6560d6e3c2"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "d10e8907594414890cbf80d282426812"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "3ea86317a90452eaf7c075f7a3aae77a"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "14e6011cc71541be5a6e70fecf6ce455"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e51552ac313840ae16c9b8187b01091"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "a4a89693cc1b5385b17c805a8b655cc0"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "1a6d0dc9afd3394ebec99480cb97eb7a"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e462b6527ed9eb1c09261b453a7dadc4"});
return tests.toArray(new Object[][]{});
}
@ -103,12 +103,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "6e157b6fdf4071fcb7da74f40146a611"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "354b84dbfaf55947aea40865e74ce66b"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fc4b7e6528747cb20e0c92699a0787cb"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6e0f5d82b77ea79a639d43b2db70e751"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a3daf472f7ab16667e5f6dab1af392ff"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "af9230fa56752b732572ce956101a2be"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "efe4e8e4d28d419e40945a948fd5bdd0"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8deed579c166443851d8fe7e3c521197"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "384e41d7c962e7a9c191c3646239b9f3"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "207ad9fdcc8d557010ec1bf2ae9f65dc"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d5b914a994b8c2f2d70960bd444fcbba"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "34e76939b6aaa206fa42548a225b15c4"});
return tests.toArray(new Object[][]{});
}
@ -128,7 +128,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "MyDataProviderHaploid", enabled=false)
@Test(dataProvider = "MyDataProviderHaploid", enabled=true)
public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
@ -140,7 +140,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "MyDataProviderTetraploid", enabled=false)
@Test(dataProvider = "MyDataProviderTetraploid", enabled=true)
public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
@ -219,20 +219,26 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}
@Test(enabled=false)
/**
* Checking that the bug's Exception is no longer been thrown (thus no md5).
*/
@Test(enabled=true)
public void testGeneralPloidyArrayIndexBug1Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec);
}
@Test(enabled=false)
/**
* Checking that the bug's Exception is no longer been thrown (thus no md5).
*/
@Test(enabled=true)
public void testGeneralPloidyArrayIndexBug2Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec);
}