Fixed the bug first by indicating the actual possible number of alternatives alleles considering the extra <NON_REF> and second by resizing the StateTracker capacity when invoked by GeneralPloidyExactAFCalc deep within its implementation of computeLog10PNonRef which is ultimatelly what get rids of the exception.
Story: https://www.pivotaltracker.com/story/show/74471252
This commit is contained in:
parent
78c2da1fef
commit
d31c5536aa
|
|
@ -101,13 +101,17 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
protected final GenomeLocParser genomeLocParser;
|
||||
|
||||
// the model used for calculating p(non-ref)
|
||||
protected ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>() {
|
||||
protected ThreadLocal<AFCalc> afcm = getAlleleFrequencyCalculatorThreadLocal();
|
||||
|
||||
@Override
|
||||
public AFCalc initialValue() {
|
||||
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger);
|
||||
}
|
||||
};
|
||||
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
|
||||
return new ThreadLocal<AFCalc>() {
|
||||
|
||||
@Override
|
||||
public AFCalc initialValue() {
|
||||
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ public class AFCalcFactory {
|
|||
* @return an initialized AFCalc
|
||||
*/
|
||||
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
|
||||
final int nSamples,
|
||||
final int nSamples, final boolean emitConfModel,
|
||||
final Logger logger) {
|
||||
final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
|
||||
UAC.requestedAlleleFrequencyCalculationModel);
|
||||
|
|
|
|||
|
|
@ -163,18 +163,20 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
final ExactACset set = new ExactACset(1, new ExactACcounts(zeroCounts));
|
||||
set.getLog10Likelihoods()[0] = 0.0;
|
||||
|
||||
final int genotypeCount = genotypeLikelihoods.size();
|
||||
combinedPoolLikelihoods.add(set);
|
||||
|
||||
if ( genotypeLikelihoods.size() <= 1 ) {
|
||||
getStateTracker().reset(numAlleles - 1);
|
||||
if ( genotypeCount <= 1 ) {
|
||||
// no meaningful GLs at all, just set the tracker to non poly values
|
||||
getStateTracker().reset(); // just mimic-ing call below
|
||||
// just mimic-ing call below
|
||||
getStateTracker().setLog10LikelihoodOfAFzero(0.0);
|
||||
} else {
|
||||
for (int p=1; p<genotypeLikelihoods.size(); p++) {
|
||||
getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
|
||||
for (int p=1; p<genotypeCount; p++) {
|
||||
combinedPoolLikelihoods = fastCombineMultiallelicPool(combinedPoolLikelihoods, genotypeLikelihoods.get(p),
|
||||
combinedPloidy, ploidyPerPool, numAlleles, log10AlleleFrequencyPriors);
|
||||
combinedPloidy = ploidyPerPool + combinedPloidy; // total number of chromosomes in combinedLikelihoods
|
||||
if (p < genotypeCount - 1) getStateTracker().reset(); // TODO -- why is this here? It makes it hard to track the n evaluation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,8 +82,8 @@ final class StateTracker {
|
|||
* maxAltAlleles in length, and so only the first getAllelesUsedInGenotyping.size() - 1 values
|
||||
* are meaningful.
|
||||
*/
|
||||
private final int[] alleleCountsOfMLE;
|
||||
private final int[] alleleCountsOfMAP;
|
||||
private int[] alleleCountsOfMLE;
|
||||
private int[] alleleCountsOfMAP;
|
||||
|
||||
/**
|
||||
* A vector of log10 likelihood values seen, for future summation. When the size of the
|
||||
|
|
@ -227,6 +227,31 @@ final class StateTracker {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
|
||||
*
|
||||
* Resetting of the data is done by the calculation model itself, so shouldn't be done by callers any longer
|
||||
*
|
||||
* @param ensureAltAlleleCapacity indicate the minimum number of alt-alleles that should be supported by the
|
||||
* tracker.
|
||||
*/
|
||||
protected void reset(final int ensureAltAlleleCapacity) {
|
||||
log10MLE = log10MAP = log10LikelihoodOfAFzero = VALUE_NOT_CALCULATED;
|
||||
log10LikelihoodsForAFGt0CacheIndex = 0;
|
||||
log10LikelihoodsForAFGt0Sum = null;
|
||||
allelesUsedInGenotyping = null;
|
||||
nEvaluations = 0;
|
||||
if (alleleCountsOfMAP.length < ensureAltAlleleCapacity) {
|
||||
final int newCapacity = Math.max(ensureAltAlleleCapacity,alleleCountsOfMAP.length << 1);
|
||||
alleleCountsOfMAP = new int[newCapacity];
|
||||
alleleCountsOfMLE = new int[newCapacity];
|
||||
} else {
|
||||
Arrays.fill(alleleCountsOfMLE, 0);
|
||||
Arrays.fill(alleleCountsOfMAP, 0);
|
||||
}
|
||||
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the data in this results object, so that it can be used in a subsequent AF calculation
|
||||
*
|
||||
|
|
@ -243,6 +268,7 @@ final class StateTracker {
|
|||
Arrays.fill(log10LikelihoodsForAFGt0, Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tell this result we used one more evaluation cycle
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -68,8 +68,12 @@ import org.broadinstitute.gatk.engine.walkers.*;
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
|
||||
|
|
@ -623,6 +627,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
public void initialize() {
|
||||
super.initialize();
|
||||
|
||||
|
||||
|
||||
if (dontGenotype && emitReferenceConfidence())
|
||||
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
|
||||
|
||||
|
|
@ -702,9 +708,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
|
||||
}
|
||||
|
||||
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY) {
|
||||
if (SCAC.emitReferenceConfidence != ReferenceConfidenceMode.NONE)
|
||||
throw new UserException.BadArgumentValue("ERC", "For now ploidies different that 2 are not allow for GVCF or BP_RESOLUTION outputs");
|
||||
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY || SCAC.requestedAlleleFrequencyCalculationModel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ import com.google.java.contract.Requires;
|
|||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
|
|
@ -103,6 +105,18 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
this(configuration,samples,genomeLocParser,false);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
|
||||
return new ThreadLocal<AFCalc>() {
|
||||
|
||||
@Override
|
||||
public AFCalc initialValue() {
|
||||
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes,
|
||||
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the merge variant across haplotypes for this engine.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -66,12 +66,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "5cc1858896aca6683282f53054bb7a61"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "010a747f5c41ddb7889168e499eb40bb"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d7dbc1c8e11a277e9db857eb766fd2c6"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "799752d88c4e15e19a953add764d2239"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "fa057b35d6fe9588c2653b6560d6e3c2"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "d10e8907594414890cbf80d282426812"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "3ea86317a90452eaf7c075f7a3aae77a"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "14e6011cc71541be5a6e70fecf6ce455"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e51552ac313840ae16c9b8187b01091"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "a4a89693cc1b5385b17c805a8b655cc0"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "1a6d0dc9afd3394ebec99480cb97eb7a"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "e462b6527ed9eb1c09261b453a7dadc4"});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
@ -103,12 +103,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "6e157b6fdf4071fcb7da74f40146a611"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "354b84dbfaf55947aea40865e74ce66b"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fc4b7e6528747cb20e0c92699a0787cb"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6e0f5d82b77ea79a639d43b2db70e751"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a3daf472f7ab16667e5f6dab1af392ff"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "af9230fa56752b732572ce956101a2be"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "efe4e8e4d28d419e40945a948fd5bdd0"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8deed579c166443851d8fe7e3c521197"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "384e41d7c962e7a9c191c3646239b9f3"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "207ad9fdcc8d557010ec1bf2ae9f65dc"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d5b914a994b8c2f2d70960bd444fcbba"});
|
||||
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "34e76939b6aaa206fa42548a225b15c4"});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
@ -128,7 +128,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
/**
|
||||
* Example testng test using MyDataProvider
|
||||
*/
|
||||
@Test(dataProvider = "MyDataProviderHaploid", enabled=false)
|
||||
@Test(dataProvider = "MyDataProviderHaploid", enabled=true)
|
||||
public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
|
|
@ -140,7 +140,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
/**
|
||||
* Example testng test using MyDataProvider
|
||||
*/
|
||||
@Test(dataProvider = "MyDataProviderTetraploid", enabled=false)
|
||||
@Test(dataProvider = "MyDataProviderTetraploid", enabled=true)
|
||||
public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
|
|
@ -219,20 +219,26 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
||||
}
|
||||
|
||||
@Test(enabled=false)
|
||||
/**
|
||||
* Checking that the bug's Exception is no longer been thrown (thus no md5).
|
||||
*/
|
||||
@Test(enabled=true)
|
||||
public void testGeneralPloidyArrayIndexBug1Fix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
|
||||
b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec);
|
||||
}
|
||||
|
||||
@Test(enabled=false)
|
||||
/**
|
||||
* Checking that the bug's Exception is no longer been thrown (thus no md5).
|
||||
*/
|
||||
@Test(enabled=true)
|
||||
public void testGeneralPloidyArrayIndexBug2Fix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
|
||||
b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue