Temp commit: new structure for pool caller, now all work is in the same framework as in UG. There's a new genotype calculation model, PoolGenotypeCalculationModel, that does all the work and plugs into UnifiedGenotyperEngine. A new AF module for pools is upcoming. Old pool caller will be removed once all work is migrated
This commit is contained in:
parent
b02ef95bcf
commit
f198cec5e2
|
|
@ -955,8 +955,8 @@
|
|||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
||||
<jvmarg line="${cofoja.jvm.args}"/>
|
||||
<!-- <jvmarg value="-Xdebug"/> -->
|
||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
||||
<jvmarg value="-Xdebug"/>
|
||||
<jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/>
|
||||
|
||||
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
|
||||
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
public enum Model {
|
||||
/** The default model with the best performance in all cases */
|
||||
EXACT
|
||||
EXACT,
|
||||
POOL
|
||||
}
|
||||
|
||||
protected int N;
|
||||
|
|
|
|||
|
|
@ -47,9 +47,17 @@ import java.util.Map;
|
|||
*/
|
||||
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||
|
||||
/* public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
BOTH
|
||||
}
|
||||
*/
|
||||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
POOLSNP,
|
||||
POOLINDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
GENOTYPE_GIVEN_ALLELES
|
||||
}
|
||||
|
||||
protected UnifiedArgumentCollection UAC;
|
||||
protected final UnifiedArgumentCollection UAC;
|
||||
protected Logger logger;
|
||||
|
||||
/**
|
||||
|
|
@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
*/
|
||||
protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments");
|
||||
this.UAC = UAC.clone();
|
||||
this.UAC = UAC;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -94,9 +94,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
|
||||
|
||||
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
public static ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
|
||||
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser,
|
||||
int minIndelCountForGenotyping, boolean doMultiAllelicCalls) {
|
||||
Allele refAllele = null, altAllele = null;
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
|
|
@ -337,7 +338,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
|
||||
} else {
|
||||
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser);
|
||||
alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser, minIndelCountForGenotyping,doMultiAllelicCalls);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class UnifiedArgumentCollection {
|
|||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
|
||||
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||
|
||||
/**
|
||||
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
|
||||
|
|
|
|||
|
|
@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.*;
|
||||
|
||||
public class UnifiedGenotyperEngine {
|
||||
|
|
@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine {
|
|||
private final VariantAnnotatorEngine annotationEngine;
|
||||
|
||||
// the model used for calculating genotypes
|
||||
private ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>>();
|
||||
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
|
||||
|
||||
// the model used for calculating p(non-ref)
|
||||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
||||
|
|
@ -121,7 +124,7 @@ public class UnifiedGenotyperEngine {
|
|||
genomeLocParser = toolkit.getGenomeLocParser();
|
||||
this.samples = new TreeSet<String>(samples);
|
||||
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
||||
this.UAC = UAC.clone();
|
||||
this.UAC = UAC;
|
||||
|
||||
this.logger = logger;
|
||||
this.verboseWriter = verboseWriter;
|
||||
|
|
@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine {
|
|||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
@ -446,7 +449,7 @@ public class UnifiedGenotyperEngine {
|
|||
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
|
||||
return null;
|
||||
|
||||
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
|
||||
if ( model.name().toUpperCase().contains("INDEL")) {
|
||||
|
||||
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||
// regular pileup in this case
|
||||
|
|
@ -476,7 +479,7 @@ public class UnifiedGenotyperEngine {
|
|||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
}
|
||||
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
||||
} else if ( model.name().toUpperCase().contains("SNP") ) {
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
|
||||
|
|
@ -618,21 +621,27 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
if (vcInput.isSNP()) {
|
||||
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP"))
|
||||
return UAC.GLmodel;
|
||||
else
|
||||
// ignore SNP's if user chose INDEL mode
|
||||
return null;
|
||||
}
|
||||
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
else if ((vcInput.isIndel() || vcInput.isMixed())) {
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
return UAC.GLmodel;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
|
||||
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
|
||||
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
return UAC.GLmodel;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
|
@ -657,58 +666,76 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
case INDEL:
|
||||
return log10AlleleFrequencyPriorsIndels;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
if (model.name().toUpperCase().contains("SNP"))
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
else if (model.name().toUpperCase().contains("INDEL"))
|
||||
return log10AlleleFrequencyPriorsIndels;
|
||||
else
|
||||
throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
|
||||
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
GenotypePriors priors;
|
||||
switch ( model ) {
|
||||
case SNP:
|
||||
// use flat priors for GLs
|
||||
priors = new DiploidSNPGenotypePriors();
|
||||
break;
|
||||
case INDEL:
|
||||
// create flat priors for Indels, actual priors will depend on event length to be genotyped
|
||||
priors = new DiploidIndelGenotypePriors();
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
if( model.name().contains("SNP") )
|
||||
priors = new DiploidSNPGenotypePriors();
|
||||
else if( model.name().contains("INDEL") )
|
||||
priors = new DiploidIndelGenotypePriors();
|
||||
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
|
||||
return priors;
|
||||
}
|
||||
|
||||
protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
return genotypePriorsSNPs;
|
||||
case INDEL:
|
||||
return genotypePriorsIndels;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
if( model.name().contains("SNP") )
|
||||
return genotypePriorsSNPs;
|
||||
if( model.name().contains("INDEL") )
|
||||
return genotypePriorsIndels;
|
||||
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
|
||||
private static Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>();
|
||||
glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger));
|
||||
glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger));
|
||||
private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
|
||||
|
||||
Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
|
||||
// GenotypeLikelihoodsCalculationModel.Model.
|
||||
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
|
||||
|
||||
for (int i = 0; i < glmClasses.size(); i++) {
|
||||
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
|
||||
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
|
||||
System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
|
||||
try {
|
||||
Object args[] = new Object[]{UAC,logger};
|
||||
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
|
||||
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return glcm;
|
||||
}
|
||||
|
||||
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
|
||||
AlleleFrequencyCalculationModel afcm;
|
||||
switch ( UAC.AFmodel ) {
|
||||
case EXACT:
|
||||
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
|
||||
|
||||
return afcm;
|
||||
for (int i = 0; i < afClasses.size(); i++) {
|
||||
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
|
||||
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
|
||||
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
|
||||
try {
|
||||
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
|
||||
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
|
||||
|
||||
return (AlleleFrequencyCalculationModel)c.newInstance(args);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
|
||||
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
|
@ -1047,6 +1048,28 @@ public class MathUtils {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Given two log-probability vectors, compute log of vector product of them:
|
||||
* in Matlab notation, return log(10.*x'*10.^y)
|
||||
* @param x vector 1
|
||||
* @param y vector 2
|
||||
* @return a double representing log (dotProd(10.^x,10.^y)
|
||||
*/
|
||||
public static double logDotProduct(double [] x, double[] y) {
|
||||
if (x.length != y.length)
|
||||
throw new ReviewedStingException("BUG: Vectors of different lengths");
|
||||
|
||||
double tmpVec[] = new double[x.length];
|
||||
|
||||
for (int k=0; k < tmpVec.length; k++ ) {
|
||||
tmpVec[k] = x[k]+y[k];
|
||||
}
|
||||
|
||||
return sumLog10(tmpVec);
|
||||
|
||||
|
||||
|
||||
}
|
||||
public static Object getMedian(List<Comparable> list) {
|
||||
return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue