From bba63a3b0ed94bf4d604d5a7f15e33f0f52fa930 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 12 Dec 2012 20:25:45 +0000 Subject: [PATCH] Fix for GSA-615: UnifiedGenotyperEngine.getGLModelsToUse takes 5% of the runtime of UG, should be optimized away. --- .../GenotypeLikelihoodsCalculationModel.java | 4 +- .../genotyper/UnifiedGenotyperEngine.java | 74 ++++++++++--------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index ae9b01f2d..a8ee4afde 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -59,8 +59,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { public enum Model { SNP, INDEL, - GeneralPloidySNP, - GeneralPloidyINDEL, + GENERALPLOIDYSNP, + GENERALPLOIDYINDEL, BOTH } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index cc086b148..8f2588679 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -52,7 +52,7 @@ import java.util.*; public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; - private static final String GPSTRING = "GeneralPloidy"; + private static final String GPSTRING = "GENERALPLOIDY"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; @@ -79,6 +79,7 @@ public class UnifiedGenotyperEngine { // the model used for calculating genotypes private ThreadLocal> glcm = new ThreadLocal>(); + private final List modelsToUse = new ArrayList(2); // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); @@ -134,6 +135,8 @@ public class UnifiedGenotyperEngine { computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); filter.add(LOW_QUAL_FILTER_NAME); + + determineGLModelsToUse(); } /** @@ -286,7 +289,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap); + return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -634,48 +637,51 @@ public class UnifiedGenotyperEngine { (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING); } + private void determineGLModelsToUse() { + + String modelPrefix = ""; + if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ) + modelPrefix = GPSTRING; + + if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { + modelPrefix += UAC.GLmodel.name().toUpperCase().replaceAll("BOTH",""); + modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); + modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); + } + else { + modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase())); + } + } + // decide whether we are currently processing SNPs, indels, neither, or both private List getGLModelsToUse(final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext) { - final List models = new ArrayList(2); - String modelPrefix = ""; - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) - modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH",""); + if ( UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) + return modelsToUse; - if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) - modelPrefix = GPSTRING + modelPrefix; + // if we're genotyping given alleles then we need to choose the model corresponding to the variant type requested + final List GGAmodel = new ArrayList(1); + final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); + if ( vcInput == null ) + return GGAmodel; // no work to be done - // if we're genotyping given alleles and we have a requested SNP at this position, do SNP - if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); - if ( vcInput == null ) - return models; - - if ( vcInput.isSNP() ) { - // ignore SNPs if the user chose INDEL mode only - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") ) - models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); - } - else if ( vcInput.isIndel() || vcInput.isMixed() ) { - // ignore INDELs if the user chose SNP mode only - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") ) - models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); - } - // No support for other types yet + if ( vcInput.isSNP() ) { + // use the SNP model unless the user chose INDEL mode only + if ( modelsToUse.size() == 2 || modelsToUse.get(0).name().endsWith("SNP") ) + GGAmodel.add(modelsToUse.get(0)); } - else { - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { - models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); - models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); - } - else { - models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase())); - } + else if ( vcInput.isIndel() || vcInput.isMixed() ) { + // use the INDEL model unless the user chose SNP mode only + if ( modelsToUse.size() == 2 ) + GGAmodel.add(modelsToUse.get(1)); + else if ( modelsToUse.get(0).name().endsWith("INDEL") ) + GGAmodel.add(modelsToUse.get(0)); } + // No support for other types yet - return models; + return GGAmodel; } public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {