From 92676c63cab236436e60daf4954ebc8bdbd459d1 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 22 Mar 2012 12:13:59 -0400 Subject: [PATCH 01/11] Make constructor of IndelGenotypeLikelihoodsCalculationModel public so it can be used in unit tests --- .../genotyper/IndelGenotypeLikelihoodsCalculationModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 1b73ef1d7..f6a5874cd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -79,7 +79,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { + public IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); From f198cec5e2085c1a3acac5e785e46dfc06678e28 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 22 Mar 2012 15:46:39 -0400 Subject: [PATCH 02/11] Temp commit: new structure for pool caller, now all work is in the same framework as in UG. There's a new genotype calculation model, PoolGenotypeCalculationModel, that does all the work and plugs into UnifiedGenotyperEngine. A new AF module for pools is upcoming. Old pool caller will be removed once all work is migrated --- build.xml | 4 +- .../AlleleFrequencyCalculationModel.java | 3 +- .../GenotypeLikelihoodsCalculationModel.java | 12 +- ...elGenotypeLikelihoodsCalculationModel.java | 7 +- .../genotyper/UnifiedArgumentCollection.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 123 +++++++++++------- .../broadinstitute/sting/utils/MathUtils.java | 23 ++++ 7 files changed, 117 insertions(+), 57 deletions(-) diff --git a/build.xml b/build.xml index d3e25d424..af083bea8 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 9f2403bbf..988b6d1ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { /** The default model with the best performance in all cases */ - EXACT + EXACT, + POOL } protected int N; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index fb2428258..7527e17b6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -47,9 +47,17 @@ import java.util.Map; */ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { +/* public enum Model { + SNP, + INDEL, + BOTH + } + */ public enum Model { SNP, INDEL, + POOLSNP, + POOLINDEL, BOTH } @@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { GENOTYPE_GIVEN_ALLELES } - protected UnifiedArgumentCollection UAC; + protected final UnifiedArgumentCollection UAC; protected Logger logger; /** @@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { */ protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments"); - this.UAC = UAC.clone(); + this.UAC = UAC; this.logger = logger; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 607f63145..d18f7e5ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -94,9 +94,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - private ArrayList computeConsensusAlleles(ReferenceContext ref, + public static ArrayList computeConsensusAlleles(ReferenceContext ref, Map contexts, - AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) { + AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser, + int minIndelCountForGenotyping, boolean doMultiAllelicCalls) { Allele refAllele = null, altAllele = null; GenomeLoc loc = ref.getLocus(); ArrayList aList = new ArrayList(); @@ -337,7 +338,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } } else { - alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser); + alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser, minIndelCountForGenotyping,doMultiAllelicCalls); if (alleleList.isEmpty()) return null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 82e411c25..823eafc46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -38,7 +38,7 @@ public class UnifiedArgumentCollection { * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. */ @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) - public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; + protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; /** * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 7edcf61a2..1382306c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; +import java.lang.reflect.Constructor; import java.util.*; public class UnifiedGenotyperEngine { @@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine { private final VariantAnnotatorEngine annotationEngine; // the model used for calculating genotypes - private ThreadLocal> glcm = new ThreadLocal>(); + private ThreadLocal> glcm = new ThreadLocal>(); // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); @@ -121,7 +124,7 @@ public class UnifiedGenotyperEngine { genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ - this.UAC = UAC.clone(); + this.UAC = UAC; this.logger = logger; this.verboseWriter = verboseWriter; @@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); + return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -446,7 +449,7 @@ public class UnifiedGenotyperEngine { if ( !BaseUtils.isRegularBase( refContext.getBase() ) ) return null; - if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) { + if ( model.name().toUpperCase().contains("INDEL")) { if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { // regular pileup in this case @@ -476,7 +479,7 @@ public class UnifiedGenotyperEngine { // stratify the AlignmentContext and cut by sample stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } - } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { + } else if ( model.name().toUpperCase().contains("SNP") ) { // stratify the AlignmentContext and cut by sample stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); @@ -618,21 +621,27 @@ public class UnifiedGenotyperEngine { return null; if (vcInput.isSNP()) { - if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) + if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) return GenotypeLikelihoodsCalculationModel.Model.SNP; + else if ( UAC.GLmodel.name().toUpperCase().contains("SNP")) + return UAC.GLmodel; else // ignore SNP's if user chose INDEL mode return null; } - else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)) - return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if ((vcInput.isIndel() || vcInput.isMixed())) { + if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) + return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if (UAC.GLmodel.name().toUpperCase().contains("INDEL")) + return UAC.GLmodel; + } } else { // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed - if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) + if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) return GenotypeLikelihoodsCalculationModel.Model.SNP; - else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) - return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL")) + return UAC.GLmodel; } } return null; @@ -657,58 +666,76 @@ public class UnifiedGenotyperEngine { } protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { - switch( model ) { - case SNP: - return log10AlleleFrequencyPriorsSNPs; - case INDEL: - return log10AlleleFrequencyPriorsIndels; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if (model.name().toUpperCase().contains("SNP")) + return log10AlleleFrequencyPriorsSNPs; + else if (model.name().toUpperCase().contains("INDEL")) + return log10AlleleFrequencyPriorsIndels; + else + throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); } private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { GenotypePriors priors; - switch ( model ) { - case SNP: - // use flat priors for GLs - priors = new DiploidSNPGenotypePriors(); - break; - case INDEL: - // create flat priors for Indels, actual priors will depend on event length to be genotyped - priors = new DiploidIndelGenotypePriors(); - break; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if( model.name().contains("SNP") ) + priors = new DiploidSNPGenotypePriors(); + else if( model.name().contains("INDEL") ) + priors = new DiploidIndelGenotypePriors(); + else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); + return priors; } protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { - switch( model ) { - case SNP: - return genotypePriorsSNPs; - case INDEL: - return genotypePriorsIndels; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if( model.name().contains("SNP") ) + return genotypePriorsSNPs; + if( model.name().contains("INDEL") ) + return genotypePriorsIndels; + else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); } - private static Map getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { - Map glcm = new HashMap(); - glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger)); - glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger)); + private static Map getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { + + + Map glcm = new HashMap(); + // GenotypeLikelihoodsCalculationModel.Model. + List> glmClasses = new PluginManager(GenotypeLikelihoodsCalculationModel.class).getPlugins(); + + for (int i = 0; i < glmClasses.size(); i++) { + Class glmClass = glmClasses.get(i); + String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); + System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName()); + try { + Object args[] = new Object[]{UAC,logger}; + Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); + glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args)); + } + catch (Exception e) { + throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage()); + } + } + return glcm; } private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { - AlleleFrequencyCalculationModel afcm; - switch ( UAC.AFmodel ) { - case EXACT: - afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter); - break; - default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); - } + List> afClasses = new PluginManager(AlleleFrequencyCalculationModel.class).getPlugins(); - return afcm; + for (int i = 0; i < afClasses.size(); i++) { + Class afClass = afClasses.get(i); + String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase(); + if (UAC.AFmodel.name().equalsIgnoreCase(key)) { + try { + Object args[] = new Object[]{UAC,N,logger,verboseWriter}; + Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class); + + return (AlleleFrequencyCalculationModel)c.newInstance(args); + } + catch (Exception e) { + throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); + } + } + } + throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); } public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index bfc326d2d..ad4264d4a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -29,6 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.math.BigDecimal; @@ -1047,6 +1048,28 @@ public class MathUtils { } + /** + * Given two log-probability vectors, compute log of vector product of them: + * in Matlab notation, return log(10.*x'*10.^y) + * @param x vector 1 + * @param y vector 2 + * @return a double representing log (dotProd(10.^x,10.^y) + */ + public static double logDotProduct(double [] x, double[] y) { + if (x.length != y.length) + throw new ReviewedStingException("BUG: Vectors of different lengths"); + + double tmpVec[] = new double[x.length]; + + for (int k=0; k < tmpVec.length; k++ ) { + tmpVec[k] = x[k]+y[k]; + } + + return sumLog10(tmpVec); + + + + } public static Object getMedian(List list) { return orderStatisticSearch((int) Math.ceil(list.size() / 2), list); } From 0a56a14d099e90e01a9fafdeb030199c56de1002 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 22 Mar 2012 16:07:07 -0400 Subject: [PATCH 04/11] Build fixes to merge pool calculation models with latest interface changes. Reverted build.xml's private debug changes --- build.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.xml b/build.xml index af083bea8..ce07138b6 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + From deb45865599df06e2fb96c177c377f6c982c479c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sat, 24 Mar 2012 21:49:43 -0400 Subject: [PATCH 05/11] Next intermediate commit for new pool caller structure: a) Bug fixes in pool GL computation. Now, correct GL's are returned per each pool to the UG engine. Work still needs to be done in redoing interface with exact model. b) Added unit tests for new MathUtils dot product and logDotProduct functions. c) Refactorings of UnifiedGentotyperEngine since N (size of prior/posterior arrays) is no longer necessarily nSamples+1 but, in general, nSamplesPerPool*nPools+1 --- build.xml | 4 ++-- .../gatk/walkers/genotyper/UnifiedGenotyper.java | 2 +- .../walkers/genotyper/UnifiedGenotyperEngine.java | 11 ++++++----- .../org/broadinstitute/sting/utils/MathUtils.java | 4 ++-- .../sting/utils/MathUtilsUnitTest.java | 12 ++++++++++++ 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/build.xml b/build.xml index ce07138b6..9715071cd 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 0eb35d299..820d58837 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -221,7 +221,7 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 1382306c6..6af3bbd1e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -115,11 +115,11 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size())); } - @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) - public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples) { + @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"}) + public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples, int N) { this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); @@ -130,7 +130,8 @@ public class UnifiedGenotyperEngine { this.verboseWriter = verboseWriter; this.annotationEngine = engine; - N = 2 * this.samples.size(); + this.N = N; + log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); @@ -703,7 +704,7 @@ public class UnifiedGenotyperEngine { for (int i = 0; i < glmClasses.size(); i++) { Class glmClass = glmClasses.get(i); String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); - System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName()); + //System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName()); try { Object args[] = new Object[]{UAC,logger}; Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index ad4264d4a..34394ff39 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -1050,7 +1050,7 @@ public class MathUtils { /** * Given two log-probability vectors, compute log of vector product of them: - * in Matlab notation, return log(10.*x'*10.^y) + * in Matlab notation, return log10(10.*x'*10.^y) * @param x vector 1 * @param y vector 2 * @return a double representing log (dotProd(10.^x,10.^y) @@ -1065,7 +1065,7 @@ public class MathUtils { tmpVec[k] = x[k]+y[k]; } - return sumLog10(tmpVec); + return log10sumLog10(tmpVec); diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 482f4da80..9e01eb5ae 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest { Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211})); } + @Test + public void testDotProduct() { + Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0); + Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0); + } + + @Test + public void testLogDotProduct() { + Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0); + Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0); + } + /** * Private function used by testNormalizeFromLog10() */ From ce617b2dfc8a24a71c4afaea01b4d885bdbd62a7 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Sun, 25 Mar 2012 10:20:21 -0400 Subject: [PATCH 06/11] Bug fix to previous UnifiedGenotyperEngine refactoring, removed debug code --- build.xml | 4 ++-- .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build.xml b/build.xml index 9715071cd..8e9de2272 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 820d58837..9a6b3b1dd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -221,7 +221,7 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); From ed322bd73f51837829e90a463c69998dc366f0d2 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 27 Mar 2012 15:03:13 -0400 Subject: [PATCH 07/11] Fix again merge issues --- .../genotyper/ExactAFCalculationModel.java | 78 +++++++------------ 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 6c7dc0dcd..891159512 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -43,7 +43,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } public List getLog10PNonRef(final VariantContext vc, - final double[][] log10AlleleFrequencyPriors, + final double[] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { GenotypesContext GLs = vc.getGenotypes(); @@ -59,7 +59,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false); } - //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result); return alleles; @@ -207,20 +206,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - // TODO -- remove me public static void linearExactMultiAllelic(final GenotypesContext GLs, final int numAlternateAlleles, - final double[][] log10AlleleFrequencyPriors, - final AlleleFrequencyCalculationResult result, - final boolean foo) { - linearExactMultiAllelic(GLs, numAlternateAlleles, log10AlleleFrequencyPriors, result); - } - - - - public static void linearExactMultiAllelic(final GenotypesContext GLs, - final int numAlternateAlleles, - final double[][] log10AlleleFrequencyPriors, + final double[] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { final ArrayList genotypeLikelihoods = getGLs(GLs); @@ -272,7 +260,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int numChr, final LinkedList ACqueue, final HashMap indexesToACset, - final double[][] log10AlleleFrequencyPriors, + final double[] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { //if ( DEBUG ) @@ -360,7 +348,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static void computeLofK(final ExactACset set, final ArrayList genotypeLikelihoods, - final double[][] log10AlleleFrequencyPriors, + final double[] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { set.log10Likelihoods[0] = 0.0; // the zero case @@ -370,47 +358,39 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( totalK == 0 ) { for ( int j = 1; j < set.log10Likelihoods.length; j++ ) set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; + + final double log10Lof0 = set.log10Likelihoods[set.log10Likelihoods.length-1]; + result.setLog10LikelihoodOfAFzero(log10Lof0); + result.setLog10PosteriorOfAFzero(log10Lof0 + log10AlleleFrequencyPriors[0]); + return; } - // k > 0 for at least one k - else { - // the non-AA possible conformations were dealt with by pushes from dependent sets; - // now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value - for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - if ( totalK < 2*j-1 ) { - final double[] gl = genotypeLikelihoods.get(j); - final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; - set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue); - } + // if we got here, then k > 0 for at least one k. + // the non-AA possible conformations were already dealt with by pushes from dependent sets; + // now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { - final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; - set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator; + if ( totalK < 2*j-1 ) { + final double[] gl = genotypeLikelihoods.get(j); + final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue); } + + final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator; } - final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; + double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; - // determine the power of theta to use - int nonRefAlleles = 0; - for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { - if ( set.ACcounts.getCounts()[i] > 0 ) - nonRefAlleles++; - } - - // for k=0, we don't want to put that value into the likelihoods/posteriors matrix, but instead want to set the value in the results object - if ( nonRefAlleles == 0 ) { - result.log10LikelihoodOfAFzero = log10LofK; - result.log10PosteriorOfAFzero = log10LofK + log10AlleleFrequencyPriors[0][0]; - } else { - // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs - for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { - int AC = set.ACcounts.getCounts()[i]; - result.log10AlleleFrequencyLikelihoods[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK); - - final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; - result.log10AlleleFrequencyPosteriors[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); - } + // update the MLE if necessary + result.updateMLEifNeeded(log10LofK, set.ACcounts.counts); + + // apply the priors over each alternate allele + for ( final int ACcount : set.ACcounts.getCounts() ) { + if ( ACcount > 0 ) + log10LofK += log10AlleleFrequencyPriors[ACcount]; } + result.updateMAPifNeeded(log10LofK, set.ACcounts.counts); } private static void pushData(final ExactACset targetSet, From 8f34412fb81591007d234d127aa7c94643126856 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 27 Mar 2012 20:59:44 -0400 Subject: [PATCH 08/11] First Pool Caller exact model: silly straightforward math implementation of biallelic pool caller exact likelihood model, no attempt and any smartness or optimization, no support yet for generalized multiallelic form, just hooking up for testing --- .../gatk/walkers/genotyper/ExactAFCalculationModel.java | 4 ++-- .../java/src/org/broadinstitute/sting/utils/MathUtils.java | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 891159512..4bda3282e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -152,7 +152,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { @Override public boolean equals(Object obj) { - return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false; + return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts); } @Override @@ -202,7 +202,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } public boolean equals(Object obj) { - return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false; + return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 072980c27..c4b0165ca 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -49,6 +49,7 @@ public class MathUtils { } public static final double[] log10Cache; + public static final double[] log10FactorialCache; private static final double[] jacobianLogTable; private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001; @@ -59,11 +60,14 @@ public class MathUtils { static { log10Cache = new double[LOG10_CACHE_SIZE]; + log10FactorialCache = new double[LOG10_CACHE_SIZE]; jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; log10Cache[0] = Double.NEGATIVE_INFINITY; - for (int k = 1; k < LOG10_CACHE_SIZE; k++) + for (int k = 1; k < LOG10_CACHE_SIZE; k++) { log10Cache[k] = Math.log10(k); + log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k]; + } for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); From d2586911a47902a78944c6a0f181be5806e0c04f Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 28 Mar 2012 08:18:36 -0400 Subject: [PATCH 09/11] Forgot to add tolerance to new MathUtils unit tests --- .../org/broadinstitute/sting/utils/MathUtilsUnitTest.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 9e01eb5ae..adc7927a7 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -286,14 +286,14 @@ public class MathUtilsUnitTest extends BaseTest { @Test public void testDotProduct() { - Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0); - Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0); + Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0,1e-3); + Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0,1e-3); } @Test public void testLogDotProduct() { - Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0); - Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0); + Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0,1e-3); + Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0,1e-3); } /** From 63cf7ec7ec1e6b50666c1a72cbc82387bfd62628 Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Tue, 27 Mar 2012 16:22:21 -0400 Subject: [PATCH 10/11] Added more primitives to GATK Report Column Type - The Integer column type now accepts byte and shorts - Updated Unit Tests and added a new testParse() test Signed-off-by: Mauricio Carneiro --- .../sting/gatk/report/GATKReportDataType.java | 46 +++++++++---------- .../sting/gatk/report/GATKReportTable.java | 13 ++---- .../sting/gatk/report/GATKReportUnitTest.java | 33 ++++++------- .../sting/queue/pipeline/PipelineTest.scala | 2 +- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java index d9bae19c7..6451c5836 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -48,9 +48,9 @@ public enum GATKReportDataType { Boolean("%[Bb]"), /** - * Used for byte and char value. Will display as a char so use printable values! + * Used for char values. Will display as a char so use printable values! */ - Byte("%[Cc]"), + Character("%[Cc]"), /** * Used for float and double values. Will output a decimal with format %.8f unless otherwise specified. @@ -58,7 +58,7 @@ public enum GATKReportDataType { Decimal("%.*[EeFf]"), /** - * Used for int, and long values. Will display the full number by default. + * Used for int, byte, short, and long values. Will display the full number by default. */ Integer("%[Dd]"), @@ -97,17 +97,26 @@ public enum GATKReportDataType { GATKReportDataType value; if (object instanceof Boolean) { value = GATKReportDataType.Boolean; - } else if (object instanceof Byte || object instanceof Character) { - value = GATKReportDataType.Byte; - } else if (object instanceof Float || object instanceof Double) { + + } else if (object instanceof Character) { + value = GATKReportDataType.Character; + + } else if (object instanceof Float || + object instanceof Double) { value = GATKReportDataType.Decimal; - } else if (object instanceof Integer || object instanceof Long) { + + } else if (object instanceof Integer || + object instanceof Long || + object instanceof Short || + object instanceof Byte ) { value = GATKReportDataType.Integer; + } else if (object instanceof String) { value = GATKReportDataType.String; + } else { value = GATKReportDataType.Unknown; - //throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); + //throw new UserException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); } return value; } @@ -140,8 +149,8 @@ public enum GATKReportDataType { return 0.0D; case Boolean: return false; - case Byte: - return (byte) 0; + case Character: + return '0'; case Integer: return 0L; case String: @@ -166,16 +175,7 @@ public enum GATKReportDataType { case Boolean: case Integer: return a.toString().equals(b.toString()); - case Byte: - // A mess that checks if the bytes and characters contain the same value - if ((a instanceof Character && b instanceof Character) || - (a instanceof Byte && b instanceof Byte)) - return a.toString().equals(b.toString()); - else if (a instanceof Character && b instanceof Byte) { - return ((Character) a).charValue() == ((Byte) b).byteValue(); - } else if (a instanceof Byte && b instanceof Character) { - return ((Byte) a).byteValue() == ((Character) b).charValue(); - } + case Character: case String: default: return a.equals(b); @@ -201,8 +201,8 @@ public enum GATKReportDataType { return Long.parseLong(str); case String: return str; - case Byte: - return (byte) str.toCharArray()[0]; + case Character: + return str.toCharArray()[0]; default: return str; } @@ -225,7 +225,7 @@ public enum GATKReportDataType { return "%d"; case String: return "%s"; - case Byte: + case Character: return "%c"; case Null: default: diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index e0e3ad1fc..1fe67154e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -254,7 +254,7 @@ public class GATKReportTable { * @param dottedColumnValues Period concatenated values. * @return The first primary key matching the column values or throws an exception. */ - public Object getPrimaryKey(String dottedColumnValues) { + public Object getPrimaryKeyByData(String dottedColumnValues) { Object key = findPrimaryKey(dottedColumnValues); if (key == null) throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); @@ -411,9 +411,8 @@ public class GATKReportTable { if (value == null) value = "null"; - // This code is bs. Why am do I have to conform to bad code - // Below is some ode to convert a string into its appropriate type. - // This is just Roger ranting + // This code below is bs. Why am do I have to conform to bad code + // Below is some code to convert a string into its appropriate type. // If we got a string but the column is not a String type Object newValue = null; @@ -431,7 +430,7 @@ public class GATKReportTable { } catch (Exception e) { } } - if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) { + if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) { newValue = ((String) value).charAt(0); } @@ -816,7 +815,7 @@ public class GATKReportTable { out.println(); } - out.println(); + out.println(); } public int getNumRows() { @@ -877,8 +876,6 @@ public class GATKReportTable { this.set(rowKey, columnKey, toAdd.get(rowKey)); //System.out.printf("Putting row with PK: %s \n", rowKey); } else { - - // TODO we should be able to handle combining data by adding, averaging, etc. this.set(rowKey, columnKey, toAdd.get(rowKey)); System.out.printf("OVERWRITING Row with PK: %s \n", rowKey); diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 90c92189e..ec0db12d3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -34,21 +34,22 @@ import java.io.IOException; import java.io.PrintStream; public class GATKReportUnitTest extends BaseTest { - @Test(enabled = false) + @Test public void testParse() throws Exception { String reportPath = validationDataLocation + "exampleGATKReportv1.tbl"; GATKReport report = new GATKReport(reportPath); + Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0); + Assert.assertEquals(report.getTables().size(), 5); GATKReportTable countVariants = report.getTable("CountVariants"); - //Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); - Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872"); + Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0"); + Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06); GATKReportTable validationReport = report.getTable("ValidationReport"); - //Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); - Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known"); - Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN"); + Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel"); + Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN); } @DataProvider(name = "rightAlignValues") @@ -117,15 +118,15 @@ public class GATKReportUnitTest extends BaseTest { report1.addTable("TableName", "Description"); report1.getTable("TableName").addPrimaryKey("id", displayPK); report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report1.getTable("TableName").set(1, "colA", "NotNum"); - report1.getTable("TableName").set(1, "colB", (byte) 64); + report1.getTable("TableName").set(1, "colB", (char) 64); report2 = new GATKReport(); report2.addTable("TableName", "Description"); report2.getTable("TableName").addPrimaryKey("id", displayPK); report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report2.getTable("TableName").set(2, "colA", "df3"); report2.getTable("TableName").set(2, "colB", 'A'); @@ -133,7 +134,7 @@ public class GATKReportUnitTest extends BaseTest { report3.addTable("TableName", "Description"); report3.getTable("TableName").addPrimaryKey("id", displayPK); report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report3.getTable("TableName").set(3, "colA", "df5f"); report3.getTable("TableName").set(3, "colB", 'c'); @@ -146,13 +147,13 @@ public class GATKReportUnitTest extends BaseTest { table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d"); table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E"); table.addColumn("TrueFalse", false, true, "%B"); - table.set("12df", "SomeInt", 34); + table.set("12df", "SomeInt", Byte.MAX_VALUE); table.set("12df", "SomeFloat", 34.0); table.set("12df", "TrueFalse", true); - table.set("5f", "SomeInt", -1); - table.set("5f", "SomeFloat", 0.000003); + table.set("5f", "SomeInt", Short.MAX_VALUE); + table.set("5f", "SomeFloat", Double.MAX_VALUE); table.set("5f", "TrueFalse", false); - table.set("RZ", "SomeInt", 904948230958203958L); + table.set("RZ", "SomeInt", Long.MAX_VALUE); table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index f0feb207b..22f4f6225 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging { println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { val table = report.getTable(validation.table) - val key = table.getPrimaryKey(validation.key) + val key = table.getPrimaryKeyByData(validation.key) val value = String.valueOf(table.get(key, validation.metric)) val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " " From bb36cd4adfe0110d393e7c890c38877805930aa4 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 27 Mar 2012 22:51:21 -0400 Subject: [PATCH 11/11] Quick fixes to BQSRGatherer and GATKReportTable * when gathering, be aware that some keys will be missing from some tables. * when a gatktable has no elements, it should still output the header so we know it had no records --- .../org/broadinstitute/sting/gatk/report/GATKReport.java | 7 ++----- .../sting/gatk/walkers/bqsr/RecalibrationReport.java | 7 +++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 8fbfa96e9..f2291e5ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -177,11 +177,8 @@ public class GATKReport { */ public void print(PrintStream out) { out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size()); - for (GATKReportTable table : tables.values()) { - if (table.getNumRows() > 0) { - table.write(out); - } - } + for (GATKReportTable table : tables.values()) + table.write(out); } public Collection getTables() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index ce00240b8..897e1645d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -94,8 +94,11 @@ public class RecalibrationReport { BitSet key = entry.getKey(); RecalDatum otherDatum = entry.getValue(); RecalDatum thisDatum = thisTable.get(key); - thisDatum.increment(otherDatum); // add the two datum objects into 'this' - thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it + if (thisDatum == null) + thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it! + else + thisDatum.increment(otherDatum); // add the two datum objects into 'this' + thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it } } }