Temp commit: new structure for pool caller, now all work is in the same framework as in UG. There's a new genotype calculation model, PoolGenotypeCalculationModel, that does all the work and plugs into UnifiedGenotyperEngine. A new AF module for pools is upcoming. Old pool caller will be removed once all work is migrated

This commit is contained in:
Guillermo del Angel 2012-03-22 15:46:39 -04:00
parent b02ef95bcf
commit f198cec5e2
7 changed files with 117 additions and 57 deletions

View File

@ -955,8 +955,8 @@
<jvmarg value="-Dpipeline.run=${pipeline.run}" /> <jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" /> <jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/> <jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> --> <jvmarg value="-Xdebug"/>
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> --> <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/>
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/> <classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false"> <classfileset dir="${java.private.test.classes}" erroronmissingdir="false">

View File

@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model { public enum Model {
/** The default model with the best performance in all cases */ /** The default model with the best performance in all cases */
EXACT EXACT,
POOL
} }
protected int N; protected int N;

View File

@ -47,9 +47,17 @@ import java.util.Map;
*/ */
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
/* public enum Model {
SNP,
INDEL,
BOTH
}
*/
public enum Model { public enum Model {
SNP, SNP,
INDEL, INDEL,
POOLSNP,
POOLINDEL,
BOTH BOTH
} }
@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
GENOTYPE_GIVEN_ALLELES GENOTYPE_GIVEN_ALLELES
} }
protected UnifiedArgumentCollection UAC; protected final UnifiedArgumentCollection UAC;
protected Logger logger; protected Logger logger;
/** /**
@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
*/ */
protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments"); if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments");
this.UAC = UAC.clone(); this.UAC = UAC;
this.logger = logger; this.logger = logger;
} }

View File

@ -94,9 +94,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref, public static ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
Map<String, AlignmentContext> contexts, Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) { AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser,
int minIndelCountForGenotyping, boolean doMultiAllelicCalls) {
Allele refAllele = null, altAllele = null; Allele refAllele = null, altAllele = null;
GenomeLoc loc = ref.getLocus(); GenomeLoc loc = ref.getLocus();
ArrayList<Allele> aList = new ArrayList<Allele>(); ArrayList<Allele> aList = new ArrayList<Allele>();
@ -337,7 +338,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} }
} else { } else {
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser); alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser, minIndelCountForGenotyping,doMultiAllelicCalls);
if (alleleList.isEmpty()) if (alleleList.isEmpty())
return null; return null;
} }

View File

@ -38,7 +38,7 @@ public class UnifiedArgumentCollection {
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/ */
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
/** /**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:

View File

@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream; import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.*; import java.util.*;
public class UnifiedGenotyperEngine { public class UnifiedGenotyperEngine {
@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine {
private final VariantAnnotatorEngine annotationEngine; private final VariantAnnotatorEngine annotationEngine;
// the model used for calculating genotypes // the model used for calculating genotypes
private ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>>(); private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
// the model used for calculating p(non-ref) // the model used for calculating p(non-ref)
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>(); private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
@ -121,7 +124,7 @@ public class UnifiedGenotyperEngine {
genomeLocParser = toolkit.getGenomeLocParser(); genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples); this.samples = new TreeSet<String>(samples);
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
this.UAC = UAC.clone(); this.UAC = UAC;
this.logger = logger; this.logger = logger;
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
} }
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
} }
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) { private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -446,7 +449,7 @@ public class UnifiedGenotyperEngine {
if ( !BaseUtils.isRegularBase( refContext.getBase() ) ) if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
return null; return null;
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) { if ( model.name().toUpperCase().contains("INDEL")) {
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
// regular pileup in this case // regular pileup in this case
@ -476,7 +479,7 @@ public class UnifiedGenotyperEngine {
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
} }
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { } else if ( model.name().toUpperCase().contains("SNP") ) {
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
@ -618,21 +621,27 @@ public class UnifiedGenotyperEngine {
return null; return null;
if (vcInput.isSNP()) { if (vcInput.isSNP()) {
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP; return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP"))
return UAC.GLmodel;
else else
// ignore SNP's if user chose INDEL mode // ignore SNP's if user chose INDEL mode
return null; return null;
} }
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)) else if ((vcInput.isIndel() || vcInput.isMixed())) {
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.INDEL; return GenotypeLikelihoodsCalculationModel.Model.INDEL;
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return UAC.GLmodel;
}
} }
else { else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP; return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return GenotypeLikelihoodsCalculationModel.Model.INDEL; return UAC.GLmodel;
} }
} }
return null; return null;
@ -657,58 +666,76 @@ public class UnifiedGenotyperEngine {
} }
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) { if (model.name().toUpperCase().contains("SNP"))
case SNP:
return log10AlleleFrequencyPriorsSNPs; return log10AlleleFrequencyPriorsSNPs;
case INDEL: else if (model.name().toUpperCase().contains("INDEL"))
return log10AlleleFrequencyPriorsIndels; return log10AlleleFrequencyPriorsIndels;
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); else
} throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
} }
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
GenotypePriors priors; GenotypePriors priors;
switch ( model ) { if( model.name().contains("SNP") )
case SNP:
// use flat priors for GLs
priors = new DiploidSNPGenotypePriors(); priors = new DiploidSNPGenotypePriors();
break; else if( model.name().contains("INDEL") )
case INDEL:
// create flat priors for Indels, actual priors will depend on event length to be genotyped
priors = new DiploidIndelGenotypePriors(); priors = new DiploidIndelGenotypePriors();
break; else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
return priors; return priors;
} }
protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) { if( model.name().contains("SNP") )
case SNP:
return genotypePriorsSNPs; return genotypePriorsSNPs;
case INDEL: if( model.name().contains("INDEL") )
return genotypePriorsIndels; return genotypePriorsIndels;
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
// GenotypeLikelihoodsCalculationModel.Model.
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
for (int i = 0; i < glmClasses.size(); i++) {
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
try {
Object args[] = new Object[]{UAC,logger};
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
}
catch (Exception e) {
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage());
} }
} }
private static Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>();
glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger));
glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger));
return glcm; return glcm;
} }
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
AlleleFrequencyCalculationModel afcm; List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
switch ( UAC.AFmodel ) {
case EXACT:
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
break;
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
return afcm; for (int i = 0; i < afClasses.size(); i++) {
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
try {
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
return (AlleleFrequencyCalculationModel)c.newInstance(args);
}
catch (Exception e) {
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
}
}
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
} }
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) { public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {

View File

@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.math.BigDecimal; import java.math.BigDecimal;
@ -1047,6 +1048,28 @@ public class MathUtils {
} }
/**
* Given two log-probability vectors, compute log of vector product of them:
* in Matlab notation, return log(10.*x'*10.^y)
* @param x vector 1
* @param y vector 2
* @return a double representing log (dotProd(10.^x,10.^y)
*/
public static double logDotProduct(double [] x, double[] y) {
if (x.length != y.length)
throw new ReviewedStingException("BUG: Vectors of different lengths");
double tmpVec[] = new double[x.length];
for (int k=0; k < tmpVec.length; k++ ) {
tmpVec[k] = x[k]+y[k];
}
return sumLog10(tmpVec);
}
public static Object getMedian(List<Comparable> list) { public static Object getMedian(List<Comparable> list) {
return orderStatisticSearch((int) Math.ceil(list.size() / 2), list); return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
} }