GenotypingEngines and walkers now use AFCalc(ulator) providers rathern than instanciate their own (fixed) calculators directly.

Changes:
-------

* GenotypingEngine uses now a AFCalc provider instead of
  its own thread-local with one-time initialized and fixed
  AF calculator.

* All walkers that use a GenotypingEngine now are passing
  the appropiate AF calculator provider. For now most
  just use a fix calculator (FixedAFCalculatorProvider)
  except GenotypeGVCFs as this one now can cope with
  mixture of ploidies failing-over to a general-ploidy
  calculator when the preferred implementation is not
  capable to handle a site's analysis.
This commit is contained in:
Valentin Ruano-Rubio 2014-09-10 13:37:21 -04:00
parent 935bd1394b
commit 3cdeab6e9e
15 changed files with 153 additions and 125 deletions

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*; import java.util.*;
@ -286,8 +287,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
builder.attributes(attributes); builder.attributes(attributes);
// create the genotypes; no-call everyone for now // create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create(); final GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>(); final int ploidy = UAC.genotypeArgs.samplePloidy;
noCall.add(Allele.NO_CALL); final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
for ( PoolGenotypeData sampleData : GLs ) { for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array // extract from multidimensional array

View File

@ -60,8 +60,8 @@ import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
@ -81,8 +81,11 @@ import java.util.*;
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> { public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public static final String LOW_QUAL_FILTER_NAME = "LowQual";
protected final AFCalculatorProvider afCalculatorProvider ;
protected Logger logger; protected Logger logger;
protected final Config configuration; protected final Config configuration;
@ -100,20 +103,6 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final GenomeLocParser genomeLocParser; protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal<AFCalc> afcm = getAlleleFrequencyCalculatorThreadLocal();
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger);
}
};
}
/** /**
* Construct a new genotyper engine, on a specific subset of samples. * Construct a new genotyper engine, on a specific subset of samples.
* *
@ -124,10 +113,17 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
* *
* @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}. * @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}.
*/ */
protected GenotypingEngine(final Config configuration,final SampleList samples, final GenomeLocParser genomeLocParser) { protected GenotypingEngine(final Config configuration, final SampleList samples,
final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider) {
if (configuration == null) if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null"); throw new IllegalArgumentException("the configuration cannot be null");
if (samples == null)
throw new IllegalArgumentException("the sample list provided cannot be null");
if (afCalculatorProvider == null)
throw new IllegalArgumentException("the AF calculator provider cannot be null");
this.afCalculatorProvider = afCalculatorProvider;
this.configuration = configuration; this.configuration = configuration;
logger = Logger.getLogger(getClass()); logger = Logger.getLogger(getClass());
this.samples = samples; this.samples = samples;
@ -220,7 +216,10 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0)
return emptyCallContext(tracker,refContext,rawContext); return emptyCallContext(tracker,refContext,rawContext);
final AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
final AFCalc afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles);
final AFCalcResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult); final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
@ -260,7 +259,8 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
builder.filter(LOW_QUAL_FILTER_NAME); builder.filter(LOW_QUAL_FILTER_NAME);
// create the genotypes // create the genotypes
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, outputAlleles, true,configuration.genotypeArgs.samplePloidy);
final GenotypesContext genotypes = afCalculator.subsetAlleles(vc, defaultPloidy, outputAlleles, true);
builder.genotypes(genotypes); builder.genotypes(genotypes);
// *** note that calculating strand bias involves overwriting data structures, so we do that last // *** note that calculating strand bias involves overwriting data structures, so we do that last

View File

@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*; import java.util.*;
@ -130,8 +131,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// create the genotypes; no-call everyone for now // create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create(); GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>(); final int ploidy = UAC.genotypeArgs.samplePloidy;
noCall.add(Allele.NO_CALL); final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
// For each sample, get genotype likelihoods based on pileup // For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
@ -148,6 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey()); final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey())); final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()));
b.PL(genotypeLikelihoods); b.PL(genotypeLikelihoods);
b.alleles(noCall);
b.DP(getFilteredDepth(pileup)); b.DP(getFilteredDepth(pileup));
genotypes.add(b.make()); genotypes.add(b.make());

View File

@ -64,6 +64,7 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*; import java.util.*;
@ -180,7 +181,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
// create the genotypes; no-call everyone for now // create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create(); final GenotypesContext genotypes = GenotypesContext.create();
final int ploidy = UAC.genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy);
for ( SampleGenotypeData sampleData : GLs ) { for ( SampleGenotypeData sampleData : GLs ) {
final double[] allLikelihoods = sampleData.GL.getLikelihoods(); final double[] allLikelihoods = sampleData.GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods]; final double[] myLikelihoods = new double[numLikelihoods];
@ -193,6 +195,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true); final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true);
gb.PL(genotypeLikelihoods); gb.PL(genotypeLikelihoods);
gb.DP(sampleData.depth); gb.DP(sampleData.depth);
gb.alleles(noCall);
if (UAC.annotateAllSitesWithPLs) if (UAC.annotateAllSitesWithPLs)
gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true))); gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true)));
genotypes.add(gb.make()); genotypes.add(gb.make());

View File

@ -46,10 +46,12 @@
package org.broadinstitute.gatk.tools.walkers.genotyper; package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.engine.walkers.*; import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.utils.commandline.*; import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
@ -59,18 +61,18 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter;
import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter; import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.SampleUtils; import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.baq.BAQ; import org.broadinstitute.gatk.utils.baq.BAQ;
import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import htsjdk.variant.variantcontext.GenotypeLikelihoods; import org.broadinstitute.gatk.utils.help.HelpConstants;
import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -284,7 +286,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
final AFCalculatorProvider afCalcAFCalculatorProvider = FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),UAC,logger);
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), afCalcAFCalculatorProvider, toolkit.getArguments().BAQMode);
genotypingEngine.setVerboseWriter(verboseWriter); genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine); genotypingEngine.setAnnotationEngine(annotationEngine);
@ -308,6 +312,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader)); writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
} }
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC, public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine, final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) { final DbsnpArgumentCollection dbsnp) {

View File

@ -55,6 +55,7 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.utils.BaseUtils; import org.broadinstitute.gatk.utils.BaseUtils;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.baq.BAQ; import org.broadinstitute.gatk.utils.baq.BAQ;
@ -104,9 +105,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* *
* @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}. * @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}.
*/ */
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration, public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration, final AFCalculatorProvider afCalculatorProvider,
final GenomeAnalysisEngine toolkit) { final GenomeAnalysisEngine toolkit) {
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode); this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),afCalculatorProvider,toolkit.getArguments().BAQMode);
} }
@ -123,10 +124,10 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}. * @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}.
*/ */
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration, public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final SampleList samples, final GenomeLocParser genomeLocParser, final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider,
final BAQ.CalculationMode baqCalculationMode) { final BAQ.CalculationMode baqCalculationMode) {
super(configuration,samples,genomeLocParser); super(configuration,samples,genomeLocParser,afCalculatorProvider);
if (baqCalculationMode == null) if (baqCalculationMode == null)
throw new IllegalArgumentException("the BAQ calculation mode cannot be null"); throw new IllegalArgumentException("the BAQ calculation mode cannot be null");
@ -416,7 +417,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation, final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation,
allAllelesToUse, false, model, perReadAlleleLikelihoodMap); allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
} }
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext, private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
@ -480,7 +483,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
AFline.append('\t'); AFline.append('\t');
AFline.append(i).append('/').append(numberOfGenomes).append('\t'); AFline.append(i).append('/').append(numberOfGenomes).append('\t');
AFline.append(String.format("%.2f\t", ((float) i) / numberOfGenomes)); AFline.append(String.format("%.2f\t", ((float) i) / numberOfGenomes));
AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(vc,configuration.genotypeArgs.samplePloidy,model)[i]));
verboseWriter.println(AFline.toString()); verboseWriter.println(AFline.toString());
} }

View File

@ -51,6 +51,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -70,11 +71,11 @@ public class AFCalcTestBuilder {
final int nSamples; final int nSamples;
final int numAltAlleles; final int numAltAlleles;
final AFCalcFactory.Calculation modelType; final AFCalculatorImplementation modelType;
final PriorType priorType; final PriorType priorType;
public AFCalcTestBuilder(final int nSamples, final int numAltAlleles, public AFCalcTestBuilder(final int nSamples, final int numAltAlleles,
final AFCalcFactory.Calculation modelType, final PriorType priorType) { final AFCalculatorImplementation modelType, final PriorType priorType) {
this.nSamples = nSamples; this.nSamples = nSamples;
this.numAltAlleles = numAltAlleles; this.numAltAlleles = numAltAlleles;
this.modelType = modelType; this.modelType = modelType;
@ -100,7 +101,7 @@ public class AFCalcTestBuilder {
} }
public AFCalc makeModel() { public AFCalc makeModel() {
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), 2); return AFCalcFactory.createCalculator(modelType, nSamples, getNumAltAlleles(), HomoSapiensConstants.DEFAULT_PLOIDY);
} }
public double[] makePriors() { public double[] makePriors() {

View File

@ -68,6 +68,7 @@ import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*; import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
@ -96,6 +97,7 @@ import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils; import org.broadinstitute.gatk.utils.sam.ReadUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -697,7 +699,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// Seems that at least with some test data we can lose genuine haploid variation if we use // Seems that at least with some test data we can lose genuine haploid variation if we use
// UGs engine with ploidy == 1 // UGs engine with ploidy == 1
simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy); simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC,
FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),simpleUAC,logger), toolkit);
activeRegionEvaluationGenotyperEngine.setLogger(logger); activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null ) if( SCAC.CONTAMINATION_FRACTION_FILE != null )
@ -707,7 +711,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser(); final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing);
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),SCAC,logger), !doNotRunPhysicalPhasing);
// initialize the output VCF header // initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -880,7 +885,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// if we don't have any data, just abort early // if we don't have any data, just abort early
return new ActivityProfileState(ref.getLocus(), 0.0); return new ActivityProfileState(ref.getLocus(), 0.0);
final List<Allele> noCall = Collections.singletonList(Allele.NO_CALL); // used to noCall all genotypes until the exact model is applied final int ploidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy;
final List<Allele> noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // used to noCall all genotypes until the exact model is applied
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();

View File

@ -51,8 +51,7 @@ import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.*; import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
@ -71,7 +70,6 @@ import java.util.*;
*/ */
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> { public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
private final static List<Allele> NO_CALL = Collections.singletonList(Allele.NO_CALL);
private static final int ALLELE_EXTENSION = 2; private static final int ALLELE_EXTENSION = 2;
private static final String phase01 = "0|1"; private static final String phase01 = "0|1";
private static final String phase10 = "1|0"; private static final String phase10 = "1|0";
@ -91,8 +89,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @param genomeLocParser {@inheritDoc} * @param genomeLocParser {@inheritDoc}
* @param doPhysicalPhasing whether to try physical phasing. * @param doPhysicalPhasing whether to try physical phasing.
*/ */
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final boolean doPhysicalPhasing) { public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final AFCalculatorProvider afCalculatorProvider, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser); super(configuration,samples,genomeLocParser,afCalculatorProvider);
if (genomeLocParser == null) if (genomeLocParser == null)
throw new IllegalArgumentException("the genome location parser provided cannot be null"); throw new IllegalArgumentException("the genome location parser provided cannot be null");
this.doPhysicalPhasing= doPhysicalPhasing; this.doPhysicalPhasing= doPhysicalPhasing;
@ -100,25 +98,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
genotypingModel = new InfiniteRandomMatingPopulationModel(); genotypingModel = new InfiniteRandomMatingPopulationModel();
} }
/**
* {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser) {
this(configuration,samples,genomeLocParser,false);
}
@Override
protected ThreadLocal<AFCalc> getAlleleFrequencyCalculatorThreadLocal() {
return new ThreadLocal<AFCalc>() {
@Override
public AFCalc initialValue() {
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes,
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger);
}
};
}
/** /**
* Change the merge variant across haplotypes for this engine. * Change the merge variant across haplotypes for this engine.
* *
@ -226,6 +205,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// Walk along each position in the key set and create each event to be outputted // Walk along each position in the key set and create each event to be outputted
final Set<Haplotype> calledHaplotypes = new HashSet<>(); final Set<Haplotype> calledHaplotypes = new HashSet<>();
final List<VariantContext> returnCalls = new ArrayList<>(); final List<VariantContext> returnCalls = new ArrayList<>();
final int ploidy = configuration.genotypeArgs.samplePloidy;
final List<Allele> noCallAlleles = GATKVariantContextUtils.noCallAlleles(ploidy);
for( final int loc : startPosKeySet ) { for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
@ -276,7 +257,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (emitReferenceConfidence) if (emitReferenceConfidence)
readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE); readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC ); final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC, noCallAlleles );
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel); final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) { if( call != null ) {
@ -698,14 +679,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/ */
@Requires({"readLikelihoods!= null", "mergedVC != null"}) @Requires({"readLikelihoods!= null", "mergedVC != null"})
@Ensures("result != null") @Ensures("result != null")
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC ) { private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
final List<Allele> vcAlleles = mergedVC.getAlleles(); final List<Allele> vcAlleles = mergedVC.getAlleles();
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles); final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods)); final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
final int sampleCount = samples.sampleCount(); final int sampleCount = samples.sampleCount();
final GenotypesContext result = GenotypesContext.create(sampleCount); final GenotypesContext result = GenotypesContext.create(sampleCount);
for (int s = 0; s < sampleCount; s++) for (int s = 0; s < sampleCount; s++)
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make()); result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(noCallAlleles).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
return result; return result;
} }
@ -769,23 +750,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return eventMapper; return eventMapper;
} }
@Ensures({"result.size() == haplotypeAllelesForSample.size()"})
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final List<List<Haplotype>> alleleMapper, final List<Haplotype> haplotypes ) {
if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return NO_CALL; }
final List<Allele> eventAllelesForSample = new ArrayList<>();
for( final Allele a : haplotypeAllelesForSample ) {
final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
final List<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
if( mappedHaplotypes.contains(haplotype) ) {
eventAllelesForSample.add(eventAlleles.get(iii));
break;
}
}
}
return eventAllelesForSample;
}
@Deprecated @Deprecated
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) { protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) {
return new EventMap(haplotype, ref, refLoc, sourceNameToAdd); return new EventMap(haplotype, ref, refLoc, sourceNameToAdd);

View File

@ -48,6 +48,7 @@ package org.broadinstitute.gatk.tools.walkers.validation;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
@ -352,12 +353,15 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
final GenomeAnalysisEngine toolkit = getToolkit(); final GenomeAnalysisEngine toolkit = getToolkit();
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
snpEngine = new UnifiedGenotypingEngine(uac,toolkit); snpEngine = new UnifiedGenotypingEngine(uac,
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
// Adding the INDEL calling arguments for UG // Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone(); UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL; uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotypingEngine(uac_indel,toolkit); indelEngine = new UnifiedGenotypingEngine(uac_indel,
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),toolkit);
// make sure we have callConf set to the threshold set by the UAC so we can use it later. // make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING; callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;

View File

@ -46,18 +46,20 @@
package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector; package org.broadinstitute.gatk.tools.walkers.validation.validationsiteselector;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult;
import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import java.util.TreeSet; import java.util.TreeSet;
public class GLBasedSampleSelector extends SampleSelector { public class GLBasedSampleSelector extends SampleSelector {
private static final int MAX_ALT_ALLELES = 4;
double[] flatPriors = null; double[] flatPriors = null;
final double referenceLikelihood; final double referenceLikelihood;
AFCalc AFCalculator; AFCalc afCalculator;
public GLBasedSampleSelector(TreeSet<String> sm, double refLik) { public GLBasedSampleSelector(TreeSet<String> sm, double refLik) {
super(sm); super(sm);
@ -78,9 +80,9 @@ public class GLBasedSampleSelector extends SampleSelector {
// do we want to apply a prior? maybe user-spec? // do we want to apply a prior? maybe user-spec?
if ( flatPriors == null ) { if ( flatPriors == null ) {
flatPriors = new double[1+2*samples.size()]; flatPriors = new double[1+2*samples.size()];
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2); afCalculator = AFCalcFactory.createCalculator(samples.size(), MAX_ALT_ALLELES, HomoSapiensConstants.DEFAULT_PLOIDY);
} }
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors); final AFCalcResult result = afCalculator.getLog10PNonRef(subContext, HomoSapiensConstants.DEFAULT_PLOIDY, MAX_ALT_ALLELES, flatPriors);
// do we want to let this qual go up or down? // do we want to let this qual go up or down?
if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) { if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) {
return true; return true;

View File

@ -63,11 +63,11 @@ import org.broadinstitute.gatk.engine.walkers.Window;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*; import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.SampleUtils; import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
@ -166,7 +166,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants); final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
// create the genotyping engine // create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode); genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger),
toolkit.getArguments().BAQMode);
// create the annotation engine // create the annotation engine
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, toolkit); annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, toolkit);
@ -182,6 +183,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final Set<String> sampleNameSet = SampleListUtils.asSet(samples); final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet); final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet);
vcfWriter.writeHeader(vcfHeader); vcfWriter.writeHeader(vcfHeader);
logger.info("Notice that the -ploidy parameter is ignored in " + getClass().getSimpleName() + " tool as this is automatically determined by the input variant files");
} }
public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
@ -192,23 +195,9 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true); final VariantContext combinedVC = ReferenceConfidenceVariantContextMerger.merge(tracker.getPrioritizedValue(variants, loc), loc, INCLUDE_NON_VARIANTS ? ref.getBase() : null, true);
if ( combinedVC == null ) if ( combinedVC == null )
return null; return null;
checkPloidy(combinedVC);
return regenotypeVC(tracker, ref, combinedVC); return regenotypeVC(tracker, ref, combinedVC);
} }
private void checkPloidy(final VariantContext combinedVC) {
final int requiredPloidy = genotypeArgs.samplePloidy;
for (final Genotype g : combinedVC.getGenotypes()) {
if (g.getPloidy() != requiredPloidy) {
throw new UserException.BadArgumentValue("ploidy",
"the input variant data contains calls with a different ploidy than the one specified (" + requiredPloidy
+ "). For example sample (" + g.getSampleName() + ") at (" + combinedVC.getChr() + ":" + combinedVC.getStart() + ")");
}
}
}
/** /**
* Re-genotype (and re-annotate) a combined genomic VC * Re-genotype (and re-annotate) a combined genomic VC
* *

View File

@ -50,6 +50,7 @@ import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList; import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList; import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils; import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection; import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output; import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -129,7 +130,9 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
final SampleList samples = final SampleList samples =
new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName))); new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)));
final Set<String> sampleNameSet = SampleListUtils.asSet(samples); final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode); UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),
FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,UAC,logger),
toolkit.getArguments().BAQMode);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -293,11 +293,12 @@ public class MathUtils {
public static double log10sumLog10(final double[] log10p, final int start, final int finish) { public static double log10sumLog10(final double[] log10p, final int start, final int finish) {
if (start >= finish)
return Double.NEGATIVE_INFINITY;
final int maxElementIndex = MathUtils.maxElementIndex(log10p, start, finish); final int maxElementIndex = MathUtils.maxElementIndex(log10p, start, finish);
double maxValue = log10p[maxElementIndex]; final double maxValue = log10p[maxElementIndex];
if(maxValue == Double.NEGATIVE_INFINITY) { if(maxValue == Double.NEGATIVE_INFINITY)
return maxValue; return maxValue;
}
double sum = 1.0; double sum = 1.0;
for (int i = start; i < finish; i++) { for (int i = start; i < finish; i++) {
double curVal = log10p[i]; double curVal = log10p[i];
@ -888,6 +889,7 @@ public class MathUtils {
if (start > endIndex) { if (start > endIndex) {
throw new IllegalArgumentException("Start cannot be after end."); throw new IllegalArgumentException("Start cannot be after end.");
} }
int maxI = start; int maxI = start;
for (int i = (start+1); i < endIndex; i++) { for (int i = (start+1); i < endIndex; i++) {
if (array[i] > array[maxI]) if (array[i] > array[maxI])

View File

@ -47,7 +47,14 @@ public class GATKVariantContextUtils {
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
/**
* Diploid NO_CALL allele list...
*
* @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad.
*/
@Deprecated
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF"; public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
@ -143,6 +150,26 @@ public class GATKVariantContextUtils {
return ref; return ref;
} }
/**
* Calculates the total ploidy of a variant context as the sum of all plodies across genotypes.
* @param vc the target variant context.
* @param defaultPloidy the default ploidy to be assume when there is no ploidy information for a genotype.
* @return never {@code null}.
*/
public static int totalPloidy(final VariantContext vc, final int defaultPloidy) {
if (vc == null)
throw new IllegalArgumentException("the vc provided cannot be null");
if (defaultPloidy < 0)
throw new IllegalArgumentException("the default ploidy must 0 or greater");
int result = 0;
for (final Genotype genotype : vc.getGenotypes()) {
final int declaredPloidy = genotype.getPloidy();
result += declaredPloidy <= 0 ? declaredPloidy : declaredPloidy;
}
return result;
}
public enum GenotypeMergeType { public enum GenotypeMergeType {
/** /**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@ -1322,6 +1349,28 @@ public class GATKVariantContextUtils {
} }
} }
/**
* Cached NO_CALL immutable lists where the position ith contains the list with i elements.
*/
private static List<Allele>[] NOCALL_LISTS = new List[] {
Collections.emptyList(),
Collections.singletonList(Allele.NO_CALL),
Collections.nCopies(2,Allele.NO_CALL)
};
/**
* Synchronized code to ensure that {@link #NOCALL_LISTS} has enough entries beyod the requested ploidy
* @param capacity the requested ploidy.
*/
private static synchronized void ensureNoCallListsCapacity(final int capacity) {
final int currentCapacity = NOCALL_LISTS.length - 1;
if (currentCapacity >= capacity)
return;
NOCALL_LISTS = Arrays.copyOf(NOCALL_LISTS,Math.max(capacity,currentCapacity << 1) + 1);
for (int i = currentCapacity + 1; i < NOCALL_LISTS.length; i++)
NOCALL_LISTS[i] = Collections.nCopies(i,Allele.NO_CALL);
}
/** /**
* Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy. * Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy.
* *
@ -1331,16 +1380,9 @@ public class GATKVariantContextUtils {
* might or might not be mutable. * might or might not be mutable.
*/ */
public static List<Allele> noCallAlleles(final int ploidy) { public static List<Allele> noCallAlleles(final int ploidy) {
if (ploidy <= 0) if (NOCALL_LISTS.length <= ploidy)
return Collections.EMPTY_LIST; ensureNoCallListsCapacity(ploidy);
else if (ploidy == 1) return NOCALL_LISTS[ploidy];
return Collections.singletonList(Allele.NO_CALL);
else {
final List<Allele> result = new ArrayList<>(ploidy);
for (int i = 0; i < ploidy; i++)
result.add(Allele.NO_CALL);
return result;
}
} }