Major refactorings/optimizations of pool caller, output still bit-true to older version: a) Move DEFAULT_PLOIDY from UnifiedGenotyperEngine to VariantContextUtils. b) Optimize iteration through all possible allele combinations. c) Don't store log PL's in hashmap from allele conformations to double, it was too slow. Things can still be optimized much more down the line if needed. d) Remove remaining traces of genotype priors.

This commit is contained in:
Guillermo del Angel 2012-04-09 14:53:05 -04:00
parent 14f6b9cd16
commit 550179a1f7
4 changed files with 10 additions and 6 deletions

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.io.PrintStream;
import java.util.*;
@ -216,7 +217,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, VariantContextUtils.DEFAULT_PLOIDY);
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo();

View File

@ -50,8 +50,6 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public static final int DEFAULT_PLOIDY = 2;
public enum OUTPUT_MODE {
/** produces calls only at variant sites */
@ -111,7 +109,7 @@ public class UnifiedGenotyperEngine {
// ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
}
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})

View File

@ -278,6 +278,10 @@ public class GenotypeLikelihoods {
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
// fast, closed form solution for diploid samples (most common use case)
if (ploidy==2)
return numAlleles*(numAlleles+1)/2;
if (numAlleles == 1)
return 1;
else if (ploidy == 1)

View File

@ -30,7 +30,6 @@ import org.apache.commons.jexl2.JexlEngine;
import org.apache.log4j.Logger;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -48,6 +47,8 @@ public class VariantContextUtils {
public final static String MERGE_FILTER_PREFIX = "filterIn";
final public static JexlEngine engine = new JexlEngine();
public static final int DEFAULT_PLOIDY = 2;
static {
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
engine.setLenient(false);
@ -1123,7 +1124,7 @@ public class VariantContextUtils {
}
// calculateNumLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(1+numOriginalAltAlleles, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(1+numOriginalAltAlleles, DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good