From 3cdeab6e9e836a50503d8eff215da57a2d5de918 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Wed, 10 Sep 2014 13:37:21 -0400 Subject: [PATCH] GenotypingEngines and walkers now use AFCalc(ulator) providers rathern than instanciate their own (fixed) calculators directly. Changes: ------- * GenotypingEngine uses now a AFCalc provider instead of its own thread-local with one-time initialized and fixed AF calculator. * All walkers that use a GenotypingEngine now are passing the appropiate AF calculator provider. For now most just use a fix calculator (FixedAFCalculatorProvider) except GenotypeGVCFs as this one now can cope with mixture of ploidies failing-over to a general-ploidy calculator when the preferred implementation is not capable to handle a site's analysis. --- ...dyGenotypeLikelihoodsCalculationModel.java | 5 +- .../walkers/genotyper/GenotypingEngine.java | 36 +++++------ ...elGenotypeLikelihoodsCalculationModel.java | 6 +- ...NPGenotypeLikelihoodsCalculationModel.java | 5 +- .../walkers/genotyper/UnifiedGenotyper.java | 26 +++++--- .../genotyper/UnifiedGenotypingEngine.java | 15 +++-- .../genotyper/afcalc/AFCalcTestBuilder.java | 7 ++- .../haplotypecaller/HaplotypeCaller.java | 12 +++- .../HaplotypeCallerGenotypingEngine.java | 52 +++------------- .../validation/GenotypeAndValidate.java | 8 ++- .../GLBasedSampleSelector.java | 10 +-- .../walkers/variantutils/GenotypeGVCFs.java | 21 ++----- .../variantutils/RegenotypeVariants.java | 5 +- .../broadinstitute/gatk/utils/MathUtils.java | 8 ++- .../variant/GATKVariantContextUtils.java | 62 ++++++++++++++++--- 15 files changed, 153 insertions(+), 125 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index 9206fbc37..d649dcc08 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -60,6 +60,7 @@ import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -286,8 +287,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G builder.attributes(attributes); // create the genotypes; no-call everyone for now final GenotypesContext genotypes = GenotypesContext.create(); - final List noCall = new ArrayList(); - noCall.add(Allele.NO_CALL); + final int ploidy = UAC.genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); for ( PoolGenotypeData sampleData : GLs ) { // extract from multidimensional array diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java index a98cd5c88..20bbe0b74 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypingEngine.java @@ -60,8 +60,8 @@ import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.MathUtils; @@ -81,8 +81,11 @@ import java.util.*; public abstract class GenotypingEngine { public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; + public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + protected final AFCalculatorProvider afCalculatorProvider ; + protected Logger logger; protected final Config configuration; @@ -100,20 +103,6 @@ public abstract class GenotypingEngine afcm = getAlleleFrequencyCalculatorThreadLocal(); - - protected ThreadLocal getAlleleFrequencyCalculatorThreadLocal() { - return new ThreadLocal() { - - @Override - public AFCalc initialValue() { - return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, false, logger); - } - }; - } - - /** * Construct a new genotyper engine, on a specific subset of samples. * @@ -124,10 +113,17 @@ public abstract class GenotypingEngine noCall = new ArrayList(); - noCall.add(Allele.NO_CALL); + final int ploidy = UAC.genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. @@ -148,6 +149,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood final GenotypeBuilder b = new GenotypeBuilder(sample.getKey()); final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey())); b.PL(genotypeLikelihoods); + b.alleles(noCall); b.DP(getFilteredDepth(pileup)); genotypes.add(b.make()); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 5e8914c8e..dad2780cd 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -64,6 +64,7 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileupImpl; import htsjdk.variant.variantcontext.*; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -180,7 +181,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // create the genotypes; no-call everyone for now final GenotypesContext genotypes = GenotypesContext.create(); - + final int ploidy = UAC.genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); for ( SampleGenotypeData sampleData : GLs ) { final double[] allLikelihoods = sampleData.GL.getLikelihoods(); final double[] myLikelihoods = new double[numLikelihoods]; @@ -193,6 +195,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true); gb.PL(genotypeLikelihoods); gb.DP(sampleData.depth); + gb.alleles(noCall); if (UAC.annotateAllSitesWithPLs) gb.attribute(UnifiedGenotypingEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true))); genotypes.add(gb.make()); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java index 9a2d123e5..f75c2be05 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyper.java @@ -46,10 +46,12 @@ package org.broadinstitute.gatk.tools.walkers.genotyper; -import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.engine.walkers.*; -import org.broadinstitute.gatk.utils.commandline.*; +import htsjdk.variant.variantcontext.GenotypeLikelihoods; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.*; import org.broadinstitute.gatk.engine.CommandLineGATK; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; @@ -59,18 +61,18 @@ import org.broadinstitute.gatk.engine.filters.BadMateFilter; import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter; import org.broadinstitute.gatk.engine.iterators.ReadTransformer; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; +import org.broadinstitute.gatk.engine.walkers.*; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.utils.SampleUtils; import org.broadinstitute.gatk.utils.baq.BAQ; -import org.broadinstitute.gatk.utils.help.HelpConstants; -import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; -import htsjdk.variant.vcf.*; +import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; -import htsjdk.variant.variantcontext.GenotypeLikelihoods; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.gatk.utils.help.HelpConstants; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.io.PrintStream; import java.util.*; @@ -284,7 +286,9 @@ public class UnifiedGenotyper extends LocusWalker, Unif verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); - genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode); + + final AFCalculatorProvider afCalcAFCalculatorProvider = FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),UAC,logger); + genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), afCalcAFCalculatorProvider, toolkit.getArguments().BAQMode); genotypingEngine.setVerboseWriter(verboseWriter); genotypingEngine.setAnnotationEngine(annotationEngine); @@ -308,6 +312,8 @@ public class UnifiedGenotyper extends LocusWalker, Unif writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader)); } + + public static Set getHeaderInfo(final UnifiedArgumentCollection UAC, final VariantAnnotatorEngine annotationEngine, final DbsnpArgumentCollection dbsnp) { diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java index f2bea114a..7fa6b172d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotypingEngine.java @@ -55,6 +55,7 @@ import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcResult; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.BaseUtils; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.baq.BAQ; @@ -104,9 +105,9 @@ public class UnifiedGenotypingEngine extends GenotypingEngine perReadAlleleLikelihoodMap) { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); - return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); + final int defaultPloidy = configuration.genotypeArgs.samplePloidy; + final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES; + return afCalculatorProvider.getInstance(vc, defaultPloidy, maxAltAlleles).getLog10PNonRef(vc, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model)); } private Map getFilteredAndStratifiedContexts(final ReferenceContext refContext, @@ -480,7 +483,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine, In // Seems that at least with some test data we can lose genuine haploid variation if we use // UGs engine with ploidy == 1 simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy); - activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit); + + activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, + FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),simpleUAC,logger), toolkit); activeRegionEvaluationGenotyperEngine.setLogger(logger); if( SCAC.CONTAMINATION_FRACTION_FILE != null ) @@ -707,7 +711,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser(); - genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing); + + genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, FixedAFCalculatorProvider.createThreadSafeProvider(getToolkit(),SCAC,logger), !doNotRunPhysicalPhasing); // initialize the output VCF header final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); @@ -880,7 +885,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // if we don't have any data, just abort early return new ActivityProfileState(ref.getLocus(), 0.0); - final List noCall = Collections.singletonList(Allele.NO_CALL); // used to noCall all genotypes until the exact model is applied + final int ploidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy; + final List noCall = GATKVariantContextUtils.noCallAlleles(ploidy); // used to noCall all genotypes until the exact model is applied final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 7f19861bc..d1a569c8a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -51,8 +51,7 @@ import com.google.java.contract.Requires; import htsjdk.variant.variantcontext.*; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.genotyper.*; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalc; -import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.Utils; @@ -71,7 +70,6 @@ import java.util.*; */ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine { - private final static List NO_CALL = Collections.singletonList(Allele.NO_CALL); private static final int ALLELE_EXTENSION = 2; private static final String phase01 = "0|1"; private static final String phase10 = "1|0"; @@ -91,8 +89,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine getAlleleFrequencyCalculatorThreadLocal() { - return new ThreadLocal() { - - @Override - public AFCalc initialValue() { - return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, - configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE , logger); - } - }; - } - /** * Change the merge variant across haplotypes for this engine. * @@ -226,6 +205,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine calledHaplotypes = new HashSet<>(); final List returnCalls = new ArrayList<>(); + final int ploidy = configuration.genotypeArgs.samplePloidy; + final List noCallAlleles = GATKVariantContextUtils.noCallAlleles(ploidy); for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region @@ -276,7 +257,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine readLikelihoods, final VariantContext mergedVC ) { + private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods readLikelihoods, final VariantContext mergedVC, final List noCallAlleles ) { final List vcAlleles = mergedVC.getAlleles(); final AlleleList alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles); final GenotypingLikelihoods likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods)); final int sampleCount = samples.sampleCount(); final GenotypesContext result = GenotypesContext.create(sampleCount); for (int s = 0; s < sampleCount; s++) - result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make()); + result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(noCallAlleles).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make()); return result; } @@ -769,23 +750,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final List> alleleMapper, final List haplotypes ) { - if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return NO_CALL; } - final List eventAllelesForSample = new ArrayList<>(); - for( final Allele a : haplotypeAllelesForSample ) { - final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); - for( int iii = 0; iii < alleleMapper.size(); iii++ ) { - final List mappedHaplotypes = alleleMapper.get(iii); - if( mappedHaplotypes.contains(haplotype) ) { - eventAllelesForSample.add(eventAlleles.get(iii)); - break; - } - } - } - return eventAllelesForSample; - } - @Deprecated protected static Map generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) { return new EventMap(haplotype, ref, refLoc, sourceNameToAdd); diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java index f78519f3f..8e67691be 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/validation/GenotypeAndValidate.java @@ -48,6 +48,7 @@ package org.broadinstitute.gatk.tools.walkers.validation; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; import org.broadinstitute.gatk.engine.walkers.*; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; @@ -352,12 +353,15 @@ public class GenotypeAndValidate extends RodWalker sm, double refLik) { super(sm); @@ -78,9 +80,9 @@ public class GLBasedSampleSelector extends SampleSelector { // do we want to apply a prior? maybe user-spec? if ( flatPriors == null ) { flatPriors = new double[1+2*samples.size()]; - AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2); + afCalculator = AFCalcFactory.createCalculator(samples.size(), MAX_ALT_ALLELES, HomoSapiensConstants.DEFAULT_PLOIDY); } - final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors); + final AFCalcResult result = afCalculator.getLog10PNonRef(subContext, HomoSapiensConstants.DEFAULT_PLOIDY, MAX_ALT_ALLELES, flatPriors); // do we want to let this qual go up or down? if ( result.getLog10LikelihoodOfAFEq0() < referenceLikelihood ) { return true; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java index 80f5afa25..c54cce704 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java @@ -63,11 +63,11 @@ import org.broadinstitute.gatk.engine.walkers.Window; import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.genotyper.*; +import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider; import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.SampleUtils; import org.broadinstitute.gatk.utils.commandline.*; -import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; @@ -166,7 +166,8 @@ public class GenotypeGVCFs extends RodWalker vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants); final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); // create the genotyping engine - genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode); + genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), GeneralPloidyFailOverAFCalculatorProvider.createThreadSafeProvider(toolkit, genotypeArgs, logger), + toolkit.getArguments().BAQMode); // create the annotation engine annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.emptyList(), this, toolkit); @@ -182,6 +183,8 @@ public class GenotypeGVCFs extends RodWalker sampleNameSet = SampleListUtils.asSet(samples); final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet); vcfWriter.writeHeader(vcfHeader); + + logger.info("Notice that the -ploidy parameter is ignored in " + getClass().getSimpleName() + " tool as this is automatically determined by the input variant files"); } public VariantContext map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { @@ -192,23 +195,9 @@ public class GenotypeGVCFs extends RodWalker implements T final SampleList samples = new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName))); final Set sampleNameSet = SampleListUtils.asSet(samples); - UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode); + UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(), + FixedAFCalculatorProvider.createThreadSafeProvider(toolkit,UAC,logger), + toolkit.getArguments().BAQMode); final Set hInfo = new HashSet(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java index 4e47da387..01aa13354 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/MathUtils.java @@ -293,11 +293,12 @@ public class MathUtils { public static double log10sumLog10(final double[] log10p, final int start, final int finish) { + if (start >= finish) + return Double.NEGATIVE_INFINITY; final int maxElementIndex = MathUtils.maxElementIndex(log10p, start, finish); - double maxValue = log10p[maxElementIndex]; - if(maxValue == Double.NEGATIVE_INFINITY) { + final double maxValue = log10p[maxElementIndex]; + if(maxValue == Double.NEGATIVE_INFINITY) return maxValue; - } double sum = 1.0; for (int i = start; i < finish; i++) { double curVal = log10p[i]; @@ -888,6 +889,7 @@ public class MathUtils { if (start > endIndex) { throw new IllegalArgumentException("Start cannot be after end."); } + int maxI = start; for (int i = (start+1); i < endIndex; i++) { if (array[i] > array[maxI]) diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java index 337f64345..3a9f1ac8a 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java @@ -47,7 +47,14 @@ public class GATKVariantContextUtils { public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + /** + * Diploid NO_CALL allele list... + * + * @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad. + */ + @Deprecated public final static List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF"; public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site @@ -143,6 +150,26 @@ public class GATKVariantContextUtils { return ref; } + /** + * Calculates the total ploidy of a variant context as the sum of all plodies across genotypes. + * @param vc the target variant context. + * @param defaultPloidy the default ploidy to be assume when there is no ploidy information for a genotype. + * @return never {@code null}. + */ + public static int totalPloidy(final VariantContext vc, final int defaultPloidy) { + if (vc == null) + throw new IllegalArgumentException("the vc provided cannot be null"); + if (defaultPloidy < 0) + throw new IllegalArgumentException("the default ploidy must 0 or greater"); + int result = 0; + for (final Genotype genotype : vc.getGenotypes()) { + final int declaredPloidy = genotype.getPloidy(); + result += declaredPloidy <= 0 ? declaredPloidy : declaredPloidy; + } + + return result; + } + public enum GenotypeMergeType { /** * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. @@ -1322,6 +1349,28 @@ public class GATKVariantContextUtils { } } + /** + * Cached NO_CALL immutable lists where the position ith contains the list with i elements. + */ + private static List[] NOCALL_LISTS = new List[] { + Collections.emptyList(), + Collections.singletonList(Allele.NO_CALL), + Collections.nCopies(2,Allele.NO_CALL) + }; + + /** + * Synchronized code to ensure that {@link #NOCALL_LISTS} has enough entries beyod the requested ploidy + * @param capacity the requested ploidy. + */ + private static synchronized void ensureNoCallListsCapacity(final int capacity) { + final int currentCapacity = NOCALL_LISTS.length - 1; + if (currentCapacity >= capacity) + return; + NOCALL_LISTS = Arrays.copyOf(NOCALL_LISTS,Math.max(capacity,currentCapacity << 1) + 1); + for (int i = currentCapacity + 1; i < NOCALL_LISTS.length; i++) + NOCALL_LISTS[i] = Collections.nCopies(i,Allele.NO_CALL); + } + /** * Returns a {@link Allele#NO_CALL NO_CALL} allele list provided the ploidy. * @@ -1331,16 +1380,9 @@ public class GATKVariantContextUtils { * might or might not be mutable. */ public static List noCallAlleles(final int ploidy) { - if (ploidy <= 0) - return Collections.EMPTY_LIST; - else if (ploidy == 1) - return Collections.singletonList(Allele.NO_CALL); - else { - final List result = new ArrayList<>(ploidy); - for (int i = 0; i < ploidy; i++) - result.add(Allele.NO_CALL); - return result; - } + if (NOCALL_LISTS.length <= ploidy) + ensureNoCallListsCapacity(ploidy); + return NOCALL_LISTS[ploidy]; }