GenotypingEngines and walkers now use AFCalc(ulator) providers rathern than instanciate their own (fixed) calculators directly.

Changes:
-------

* GenotypingEngine uses now a AFCalc provider instead of
  its own thread-local with one-time initialized and fixed
  AF calculator.

* All walkers that use a GenotypingEngine now are passing
  the appropiate AF calculator provider. For now most
  just use a fix calculator (FixedAFCalculatorProvider)
  except GenotypeGVCFs as this one now can cope with
  mixture of ploidies failing-over to a general-ploidy
  calculator when the preferred implementation is not
  capable to handle a site's analysis.
This commit is contained in:
Valentin Ruano-Rubio 2014-09-10 13:37:21 -04:00
parent 935bd1394b
commit 3cdeab6e9e
15 changed files with 153 additions and 125 deletions

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -286,8 +287,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
builder.attributes(attributes);
// create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array

View File

@ -60,8 +60,8 @@ import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
@ -81,8 +81,11 @@ import java.util.*;
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
protected final AFCalculatorProvider afCalculatorProvider ;
protected Logger logger;
protected final Config configuration;
@ -100,20 +103,6 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal<AFCalc> afcm = getAlleleFrequencyCalculatorThreadLocal();
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger);
}
};
}
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
@ -124,10 +113,17 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*
* @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}.
*/
protected GenotypingEngine(final Config configuration,final SampleList samples, final GenomeLocParser genomeLocParser) {
protected GenotypingEngine(final Config configuration, final SampleList samples,
final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider) {
if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null");
if (samples == null)
throw new IllegalArgumentException("the sample list provided cannot be null");
if (afCalculatorProvider == null)
throw new IllegalArgumentException("the AF calculator provider cannot be null");
this.afCalculatorProvider = afCalculatorProvider;
this.configuration = configuration;
logger = Logger.getLogger(getClass());
this.samples = samples;
@ -220,7 +216,10 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0)
return emptyCallContext(tracker,refContext,rawContext);
final AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
final AFCalc afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles);
final AFCalcResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
@ -260,7 +259,8 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
builder.filter(LOW_QUAL_FILTER_NAME);
// create the genotypes
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, outputAlleles, true,configuration.genotypeArgs.samplePloidy);
final GenotypesContext genotypes = afCalculator.subsetAlleles(vc, defaultPloidy, outputAlleles, true);
builder.genotypes(genotypes);
// *** note that calculating strand bias involves overwriting data structures, so we do that last

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -130,8 +131,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
// For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
@ -148,6 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()));
b.PL(genotypeLikelihoods);
b.alleles(noCall);
b.DP(getFilteredDepth(pileup));
genotypes.add(b.make());

View File

@ -64,6 +64,7 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -180,7 +181,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
// create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create();
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
for ( SampleGenotypeData sampleData : GLs ) {
final double[] allLikelihoods = sampleData.GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods];
@ -193,6 +195,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
gb.PL(genotypeLikelihoods);
gb.DP(sampleData.depth);
gb.alleles(noCall);
if (UAC.annotateAllSitesWithPLs)
gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true)));
genotypes.add(gb.make());

View File

@ -46,10 +46,12 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
@ -59,18 +61,18 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter;
import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.baq.BAQ;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.io.PrintStream;
import java.util.*;
@ -284,7 +286,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
final AFCalculatorProvider afCalcAFCalculatorProvider = FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),UAC,logger);
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), afCalcAFCalculatorProvider, toolkit.getArguments().BAQMode);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
@ -308,6 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) {

View File

@ -55,6 +55,7 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.baq.BAQ;
@ -104,9 +105,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
*
* @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}.
*/
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration, final AFCalculatorProvider afCalculatorProvider,
final GenomeAnalysisEngine toolkit) {
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),afCalculatorProvider,toolkit.getArguments().BAQMode);
}
@ -123,10 +124,10 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}.
*/
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final SampleList samples, final GenomeLocParser genomeLocParser,
final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider,
final BAQ.CalculationMode baqCalculationMode) {
super(configuration,samples,genomeLocParser);
super(configuration,samples,genomeLocParser,afCalculatorProvider);
if (baqCalculationMode == null)
throw new IllegalArgumentException("the BAQ calculation mode cannot be null");
@ -416,7 +417,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation,
allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
@ -480,7 +483,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
AFline.append('\t');
AFline.append(i).append('/').append(numberOfGenomes).append('\t');
AFline.append(String.format("%.2f\t", ((float) i) / numberOfGenomes));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(vc,configuration.genotypeArgs.samplePloidy,model)[i]));
verboseWriter.println(AFline.toString());
}

View File

@ -51,6 +51,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.util.ArrayList;
import java.util.Arrays;
@ -70,11 +71,11 @@ public class AFCalcTestBuilder {
final int nSamples;
final int numAltAlleles;
final AFCalcFactory.Calculation modelType;
final AFCalculatorImplementation modelType;
final PriorType priorType;
public AFCalcTestBuilder(final int nSamples, final int numAltAlleles,
final AFCalcFactory.Calculation modelType, final PriorType priorType) {
final AFCalculatorImplementation modelType, final PriorType priorType) {
this.nSamples = nSamples;
this.numAltAlleles = numAltAlleles;
this.modelType = modelType;
@ -100,7 +101,7 @@ public class AFCalcTestBuilder {
}
public AFCalc makeModel() {
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), 2);
return AFCalcFactory.createCalculator(modelType, nSamples, getNumAltAlleles(), HomoSapiensConstants.DEFAULT_PLOIDY);
}
public double[] makePriors() {

View File

@ -68,6 +68,7 @@ import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
@ -96,6 +97,7 @@ import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.io.FileNotFoundException;
@ -697,7 +699,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// Seems that at least with some test data we can lose genuine haploid variation if we use
// UGs engine with ploidy == 1
simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC,
FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),simpleUAC,logger), toolkit);
activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
@ -707,7 +711,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing);
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),SCAC,logger), !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -880,7 +885,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// if we don't have any data, just abort early
return new ActivityProfileState(ref.getLocus(), 0.0);
final List<Allele> noCall = Collections.singletonList(Allele.NO_CALL); // used to noCall all genotypes until the exact model is applied
final int ploidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // used to noCall all genotypes until the exact model is applied
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();

View File

@ -51,8 +51,7 @@ import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
@ -71,7 +70,6 @@ import java.util.*;
*/
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
private final static List<Allele> NO_CALL = Collections.singletonList(Allele.NO_CALL);
private static final int ALLELE_EXTENSION = 2;
private static final String phase01 = "0|1";
private static final String phase10 = "1|0";
@ -91,8 +89,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @param genomeLocParser {@inheritDoc}
* @param doPhysicalPhasing whether to try physical phasing.
*/
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser);
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser,afCalculatorProvider);
if (genomeLocParser == null)
throw new IllegalArgumentException("the genome location parser provided cannot be null");
this.doPhysicalPhasing= doPhysicalPhasing;
@ -100,25 +98,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
genotypingModel = new InfiniteRandomMatingPopulationModel();
}
/**
* {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser) {
this(configuration,samples,genomeLocParser,false);
}
@Override
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes,
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger);
}
};
}
/**
* Change the merge variant across haplotypes for this engine.
*
@ -226,6 +205,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// Walk along each position in the key set and create each event to be outputted
final Set<Haplotype> calledHaplotypes = new HashSet<>();
final List<VariantContext> returnCalls = new ArrayList<>();
final int ploidy = configuration.genotypeArgs.samplePloidy;
final List<Allele> noCallAlleles = GATKVariantContextUtils.noCallAlleles(ploidy);
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
@ -276,7 +257,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (emitReferenceConfidence)
readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC );
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC, noCallAlleles );
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) {
@ -698,14 +679,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/
@Requires({"readLikelihoods!= null", "mergedVC != null"})
@Ensures("result != null")
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC ) {
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
final List<Allele> vcAlleles = mergedVC.getAlleles();
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
final int sampleCount = samples.sampleCount();
final GenotypesContext result = GenotypesContext.create(sampleCount);
for (int s = 0; s < sampleCount; s++)
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(noCallAlleles).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
return result;
}
@ -769,23 +750,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return eventMapper;
}
@Ensures({"result.size() == haplotypeAllelesForSample.size()"})
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final List<List<Haplotype>> alleleMapper, final List<Haplotype> haplotypes ) {
if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return NO_CALL; }
final List<Allele> eventAllelesForSample = new ArrayList<>();
for( final Allele a : haplotypeAllelesForSample ) {
final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
final List<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
if( mappedHaplotypes.contains(haplotype) ) {
eventAllelesForSample.add(eventAlleles.get(iii));
break;
}
}
}
return eventAllelesForSample;
}
@Deprecated
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) {
return new EventMap(haplotype, ref, refLoc, sourceNameToAdd);

View File

@ -48,6 +48,7 @@ package org.broadinstitute.gatk.tools.walkers.validation;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
@ -352,12 +353,15 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
final GenomeAnalysisEngine toolkit = getToolkit();
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
snpEngine = new UnifiedGenotypingEngine(uac,toolkit);
snpEngine = new UnifiedGenotypingEngine(uac,
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
// Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotypingEngine(uac_indel,toolkit);
indelEngine = new UnifiedGenotypingEngine(uac_indel,
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;

View File

@ -46,18 +46,20 @@
package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.util.TreeSet;
public class GLBasedSampleSelector extends SampleSelector {
private static final int MAX_ALT_ALLELES = 4;
double[] flatPriors = null;
final double referenceLikelihood;
AFCalc AFCalculator;
AFCalc afCalculator;
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
super(sm);
@ -78,9 +80,9 @@ public class GLBasedSampleSelector extends SampleSelector {
// do we want to apply a prior? maybe user-spec?
if ( flatPriors == null ) {
flatPriors = new double[1+2*samples.size()];
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2);
afCalculator = AFCalcFactory.createCalculator(samples.size(), MAX_ALT_ALLELES, HomoSapiensConstants.DEFAULT_PLOIDY);
}
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors);
final AFCalcResult result = afCalculator.getLog10PNonRef(subContext, HomoSapiensConstants.DEFAULT_PLOIDY, MAX_ALT_ALLELES, flatPriors);
// do we want to let this qual go up or down?
if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) {
return true;

View File

@ -63,11 +63,11 @@ import org.broadinstitute.gatk.engine.walkers.Window;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
@ -166,7 +166,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
// create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger),
toolkit.getArguments().BAQMode);
// create the annotation engine
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, toolkit);
@ -182,6 +183,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet);
vcfWriter.writeHeader(vcfHeader);
logger.info("Notice that the -ploidy parameter is ignored in " + getClass().getSimpleName() + " tool as this is automatically determined by the input variant files");
}
public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
@ -192,23 +195,9 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true);
if ( combinedVC == null )
return null;
checkPloidy(combinedVC);
return regenotypeVC(tracker, ref, combinedVC);
}
private void checkPloidy(final VariantContext combinedVC) {
final int requiredPloidy = genotypeArgs.samplePloidy;
for (final Genotype g : combinedVC.getGenotypes()) {
if (g.getPloidy() != requiredPloidy) {
throw new UserException.BadArgumentValue("ploidy",
"the input variant data contains calls with a different ploidy than the one specified (" + requiredPloidy
+ "). For example sample (" + g.getSampleName() + ") at (" + combinedVC.getChr() + ":" + combinedVC.getStart() + ")");
}
}
}
/**
* Re-genotype (and re-annotate) a combined genomic VC
*

View File

@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -129,7 +130,9 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
final SampleList samples =
new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)));
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,UAC,logger),
toolkit.getArguments().BAQMode);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -293,11 +293,12 @@ public class MathUtils {
public static double log10sumLog10(final double[] log10p, final int start, final int finish) {
if (start >= finish)
return Double.NEGATIVE_INFINITY;
final int maxElementIndex = MathUtils.maxElementIndex(log10p, start, finish);
double maxValue = log10p[maxElementIndex];
if(maxValue == Double.NEGATIVE_INFINITY) {
final double maxValue = log10p[maxElementIndex];
if(maxValue == Double.NEGATIVE_INFINITY)
return maxValue;
}
double sum = 1.0;
for (int i = start; i < finish; i++) {
double curVal = log10p[i];
@ -888,6 +889,7 @@ public class MathUtils {
if (start > endIndex) {
throw new IllegalArgumentException("Start cannot be after end.");
}
int maxI = start;
for (int i = (start+1); i < endIndex; i++) {
if (array[i] > array[maxI])

View File

@ -47,7 +47,14 @@ public class GATKVariantContextUtils {
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
/**
* Diploid NO_CALL allele list...
*
* @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad.
*/
@Deprecated
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
@ -143,6 +150,26 @@ public class GATKVariantContextUtils {
return ref;
}
/**
* Calculates the total ploidy of a variant context as the sum of all plodies across genotypes.
* @param vc the target variant context.
* @param defaultPloidy the default ploidy to be assume when there is no ploidy information for a genotype.
* @return never {@code null}.
*/
public static int totalPloidy(final VariantContext vc, final int defaultPloidy) {
if (vc == null)
throw new IllegalArgumentException("the vc provided cannot be null");
if (defaultPloidy < 0)
throw new IllegalArgumentException("the default ploidy must 0 or greater");
int result = 0;
for (final Genotype genotype : vc.getGenotypes()) {
final int declaredPloidy = genotype.getPloidy();
result += declaredPloidy <= 0 ? declaredPloidy : declaredPloidy;
}
return result;
}
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@ -1322,6 +1349,28 @@ public class GATKVariantContextUtils {
}
}
/**
* Cached NO_CALL immutable lists where the position ith contains the list with i elements.
*/
private static List<Allele>[] NOCALL_LISTS = new List[] {
Collections.emptyList(),
Collections.singletonList(Allele.NO_CALL),
Collections.nCopies(2,Allele.NO_CALL)
};
/**
* Synchronized code to ensure that {@link #NOCALL_LISTS} has enough entries beyod the requested ploidy
* @param capacity the requested ploidy.
*/
private static synchronized void ensureNoCallListsCapacity(final int capacity) {
final int currentCapacity = NOCALL_LISTS.length - 1;
if (currentCapacity >= capacity)
return;
NOCALL_LISTS = Arrays.copyOf(NOCALL_LISTS,Math.max(capacity,currentCapacity << 1) + 1);
for (int i = currentCapacity + 1; i < NOCALL_LISTS.length; i++)
NOCALL_LISTS[i] = Collections.nCopies(i,Allele.NO_CALL);
}
/**
* Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
*
@ -1331,16 +1380,9 @@ public class GATKVariantContextUtils {
* might or might not be mutable.
*/
public static List<Allele> noCallAlleles(final int ploidy) {
if (ploidy <= 0)
return Collections.EMPTY_LIST;
else if (ploidy == 1)
return Collections.singletonList(Allele.NO_CALL);
else {
final List<Allele> result = new ArrayList<>(ploidy);
for (int i = 0; i < ploidy; i++)
result.add(Allele.NO_CALL);
return result;
}
if (NOCALL_LISTS.length <= ploidy)
ensureNoCallListsCapacity(ploidy);
return NOCALL_LISTS[ploidy];
}