diff --git a/build.xml b/build.xml index d3e25d424..af083bea8 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 9f2403bbf..988b6d1ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { /** The default model with the best performance in all cases */ - EXACT + EXACT, + POOL } protected int N; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index fb2428258..7527e17b6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -47,9 +47,17 @@ import java.util.Map; */ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { +/* public enum Model { + SNP, + INDEL, + BOTH + } + */ public enum Model { SNP, INDEL, + POOLSNP, + POOLINDEL, BOTH } @@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { GENOTYPE_GIVEN_ALLELES } - protected UnifiedArgumentCollection UAC; + protected final UnifiedArgumentCollection UAC; protected Logger logger; /** @@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { */ protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments"); - this.UAC = UAC.clone(); + this.UAC = UAC; this.logger = logger; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 607f63145..d18f7e5ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -94,9 +94,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - private ArrayList computeConsensusAlleles(ReferenceContext ref, + public static ArrayList computeConsensusAlleles(ReferenceContext ref, Map contexts, - AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) { + AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser, + int minIndelCountForGenotyping, boolean doMultiAllelicCalls) { Allele refAllele = null, altAllele = null; GenomeLoc loc = ref.getLocus(); ArrayList aList = new ArrayList(); @@ -337,7 +338,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } } else { - alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser); + alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser, minIndelCountForGenotyping,doMultiAllelicCalls); if (alleleList.isEmpty()) return null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 82e411c25..823eafc46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -38,7 +38,7 @@ public class UnifiedArgumentCollection { * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. */ @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) - public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; + protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; /** * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 7edcf61a2..1382306c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; +import java.lang.reflect.Constructor; import java.util.*; public class UnifiedGenotyperEngine { @@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine { private final VariantAnnotatorEngine annotationEngine; // the model used for calculating genotypes - private ThreadLocal> glcm = new ThreadLocal>(); + private ThreadLocal> glcm = new ThreadLocal>(); // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); @@ -121,7 +124,7 @@ public class UnifiedGenotyperEngine { genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ - this.UAC = UAC.clone(); + this.UAC = UAC; this.logger = logger; this.verboseWriter = verboseWriter; @@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); + return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -446,7 +449,7 @@ public class UnifiedGenotyperEngine { if ( !BaseUtils.isRegularBase( refContext.getBase() ) ) return null; - if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) { + if ( model.name().toUpperCase().contains("INDEL")) { if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { // regular pileup in this case @@ -476,7 +479,7 @@ public class UnifiedGenotyperEngine { // stratify the AlignmentContext and cut by sample stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } - } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { + } else if ( model.name().toUpperCase().contains("SNP") ) { // stratify the AlignmentContext and cut by sample stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); @@ -618,21 +621,27 @@ public class UnifiedGenotyperEngine { return null; if (vcInput.isSNP()) { - if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) + if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) return GenotypeLikelihoodsCalculationModel.Model.SNP; + else if ( UAC.GLmodel.name().toUpperCase().contains("SNP")) + return UAC.GLmodel; else // ignore SNP's if user chose INDEL mode return null; } - else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)) - return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if ((vcInput.isIndel() || vcInput.isMixed())) { + if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) + return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if (UAC.GLmodel.name().toUpperCase().contains("INDEL")) + return UAC.GLmodel; + } } else { // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed - if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) + if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) return GenotypeLikelihoodsCalculationModel.Model.SNP; - else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) - return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL")) + return UAC.GLmodel; } } return null; @@ -657,58 +666,76 @@ public class UnifiedGenotyperEngine { } protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { - switch( model ) { - case SNP: - return log10AlleleFrequencyPriorsSNPs; - case INDEL: - return log10AlleleFrequencyPriorsIndels; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if (model.name().toUpperCase().contains("SNP")) + return log10AlleleFrequencyPriorsSNPs; + else if (model.name().toUpperCase().contains("INDEL")) + return log10AlleleFrequencyPriorsIndels; + else + throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); } private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { GenotypePriors priors; - switch ( model ) { - case SNP: - // use flat priors for GLs - priors = new DiploidSNPGenotypePriors(); - break; - case INDEL: - // create flat priors for Indels, actual priors will depend on event length to be genotyped - priors = new DiploidIndelGenotypePriors(); - break; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if( model.name().contains("SNP") ) + priors = new DiploidSNPGenotypePriors(); + else if( model.name().contains("INDEL") ) + priors = new DiploidIndelGenotypePriors(); + else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); + return priors; } protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { - switch( model ) { - case SNP: - return genotypePriorsSNPs; - case INDEL: - return genotypePriorsIndels; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if( model.name().contains("SNP") ) + return genotypePriorsSNPs; + if( model.name().contains("INDEL") ) + return genotypePriorsIndels; + else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); } - private static Map getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { - Map glcm = new HashMap(); - glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger)); - glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger)); + private static Map getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { + + + Map glcm = new HashMap(); + // GenotypeLikelihoodsCalculationModel.Model. + List> glmClasses = new PluginManager(GenotypeLikelihoodsCalculationModel.class).getPlugins(); + + for (int i = 0; i < glmClasses.size(); i++) { + Class glmClass = glmClasses.get(i); + String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); + System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName()); + try { + Object args[] = new Object[]{UAC,logger}; + Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); + glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args)); + } + catch (Exception e) { + throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage()); + } + } + return glcm; } private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { - AlleleFrequencyCalculationModel afcm; - switch ( UAC.AFmodel ) { - case EXACT: - afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter); - break; - default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); - } + List> afClasses = new PluginManager(AlleleFrequencyCalculationModel.class).getPlugins(); - return afcm; + for (int i = 0; i < afClasses.size(); i++) { + Class afClass = afClasses.get(i); + String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase(); + if (UAC.AFmodel.name().equalsIgnoreCase(key)) { + try { + Object args[] = new Object[]{UAC,N,logger,verboseWriter}; + Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class); + + return (AlleleFrequencyCalculationModel)c.newInstance(args); + } + catch (Exception e) { + throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); + } + } + } + throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); } public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index bfc326d2d..ad4264d4a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -29,6 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.math.BigDecimal; @@ -1047,6 +1048,28 @@ public class MathUtils { } + /** + * Given two log-probability vectors, compute log of vector product of them: + * in Matlab notation, return log(10.*x'*10.^y) + * @param x vector 1 + * @param y vector 2 + * @return a double representing log (dotProd(10.^x,10.^y) + */ + public static double logDotProduct(double [] x, double[] y) { + if (x.length != y.length) + throw new ReviewedStingException("BUG: Vectors of different lengths"); + + double tmpVec[] = new double[x.length]; + + for (int k=0; k < tmpVec.length; k++ ) { + tmpVec[k] = x[k]+y[k]; + } + + return sumLog10(tmpVec); + + + + } public static Object getMedian(List list) { return orderStatisticSearch((int) Math.ceil(list.size() / 2), list); }