Fix for GSA-615: UnifiedGenotyperEngine.getGLModelsToUse takes 5% of the runtime of UG, should be optimized away.

This commit is contained in:
Eric Banks 2012-12-12 20:25:45 +00:00
parent 211a6e78ea
commit bba63a3b0e
2 changed files with 42 additions and 36 deletions

View File

@ -59,8 +59,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model { public enum Model {
SNP, SNP,
INDEL, INDEL,
GeneralPloidySNP, GENERALPLOIDYSNP,
GeneralPloidyINDEL, GENERALPLOIDYINDEL,
BOTH BOTH
} }

View File

@ -52,7 +52,7 @@ import java.util.*;
public class UnifiedGenotyperEngine { public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public static final String LOW_QUAL_FILTER_NAME = "LowQual";
private static final String GPSTRING = "GeneralPloidy"; private static final String GPSTRING = "GENERALPLOIDY";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
@ -79,6 +79,7 @@ public class UnifiedGenotyperEngine {
// the model used for calculating genotypes // the model used for calculating genotypes
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>(); private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
private final List<GenotypeLikelihoodsCalculationModel.Model> modelsToUse = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
// the model used for calculating p(non-ref) // the model used for calculating p(non-ref)
private ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>(); private ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>();
@ -134,6 +135,8 @@ public class UnifiedGenotyperEngine {
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
filter.add(LOW_QUAL_FILTER_NAME); filter.add(LOW_QUAL_FILTER_NAME);
determineGLModelsToUse();
} }
/** /**
@ -286,7 +289,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
} }
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap); return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
} }
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) { private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -634,48 +637,51 @@ public class UnifiedGenotyperEngine {
(UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING); (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
} }
private void determineGLModelsToUse() {
String modelPrefix = "";
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
modelPrefix = GPSTRING;
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
modelPrefix += UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
else {
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
}
// decide whether we are currently processing SNPs, indels, neither, or both // decide whether we are currently processing SNPs, indels, neither, or both
private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker, private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
final ReferenceContext refContext, final ReferenceContext refContext,
final AlignmentContext rawContext) { final AlignmentContext rawContext) {
final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2); if ( UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES )
String modelPrefix = ""; return modelsToUse;
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) // if we're genotyping given alleles then we need to choose the model corresponding to the variant type requested
modelPrefix = GPSTRING + modelPrefix; final List<GenotypeLikelihoodsCalculationModel.Model> GGAmodel = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(1);
final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
if ( vcInput == null )
return GGAmodel; // no work to be done
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP if ( vcInput.isSNP() ) {
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { // use the SNP model unless the user chose INDEL mode only
final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); if ( modelsToUse.size() == 2 || modelsToUse.get(0).name().endsWith("SNP") )
if ( vcInput == null ) GGAmodel.add(modelsToUse.get(0));
return models;
if ( vcInput.isSNP() ) {
// ignore SNPs if the user chose INDEL mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
}
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// ignore INDELs if the user chose SNP mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
// No support for other types yet
} }
else { else if ( vcInput.isIndel() || vcInput.isMixed() ) {
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { // use the INDEL model unless the user chose SNP mode only
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); if ( modelsToUse.size() == 2 )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); GGAmodel.add(modelsToUse.get(1));
} else if ( modelsToUse.get(0).name().endsWith("INDEL") )
else { GGAmodel.add(modelsToUse.get(0));
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
} }
// No support for other types yet
return models; return GGAmodel;
} }
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) { public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {