diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java index b70ff8ac6..f707e8479 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java @@ -14,14 +14,25 @@ import java.util.Arrays; @Analysis(name = "Count Variants", description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator { + + // the following fields are in output order: + + // basic counts on various rates found @DataPoint(description = "Number of processed loci") long nProcessedLoci = 0; @DataPoint(description = "Number of called loci") long nCalledLoci = 0; - @DataPoint(description = "Number of variant loci") - long nVariantLoci = 0; @DataPoint(description = "Number of reference loci") long nRefLoci = 0; + @DataPoint(description = "Number of variant loci") + long nVariantLoci = 0; + + // the following two calculations get set in the finalizeEvaluation + @DataPoint(description = "Variants per loci rate") + double variantRate = 0; + @DataPoint(description = "Number of variants per base") + double variantRatePerBp = 0; + @DataPoint(description = "Number of snp loci") long nSNPs = 0; @@ -41,6 +52,20 @@ public class CountVariants extends VariantEvaluator { @DataPoint(description = "Number of hom var loci") long nHomVar = 0; + // calculations that get set in the finalizeEvaluation method + @DataPoint(description = "heterozygosity per locus rate") + double heterozygosity = 0; + @DataPoint(description = "heterozygosity per base pair") + double heterozygosityPerBp = 0; + @DataPoint(description = "heterozygosity to homozygosity ratio") + double hetHomRatio = 0; + @DataPoint(description = "indel rate (insertion count + deletion count)") + double indelRate = 0; + @DataPoint(description = "indel rate per base pair") + double indelRatePerBp = 0; + @DataPoint(description = "deletion to insertion ratio") + double deletionInsertionRatio = 0; + public CountVariants(VariantEval2Walker parent) { // don't do anything } @@ -112,39 +137,14 @@ public class CountVariants extends VariantEvaluator { return null; // we don't capture any interesting sites } - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %d %d " + - "%.2e %d " + - "%d %d %d %d " + - "%d %d %d " + - "%.2e %d %.2f " + - "%.2f %d %.2f", - nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci, - perLocusRate(nVariantLoci), perLocusRInverseRate(nVariantLoci), - nSNPs, nDeletions, nInsertions, nComplex, - nHomRef, nHets, nHomVar, - perLocusRate(nHets), perLocusRInverseRate(nHets), ratio(nHets, nHomVar), - perLocusRate(nDeletions + nInsertions), perLocusRInverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions)); - } - - private static List HEADER = - Arrays.asList("nProcessedLoci", "nCalledLoci", "nRefLoci", "nVariantLoci", - "variantRate", "variantRatePerBp", - "nSNPs", "nDeletions", "nInsertions", "nComplex", - "nHomRefGenotypes", "nHetGenotypes", "nHomVarGenotypes", - "heterozygosity", "heterozygosityPerBp", "hetHomRatio", - "indelRate", "indelRatePerBp", "deletionInsertionRatio"); - - // making it a table - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); + public void finalizeEvaluation() { + variantRate = perLocusRate(nVariantLoci); + variantRatePerBp = perLocusRInverseRate(nVariantLoci); + heterozygosity = perLocusRate(nHets); + heterozygosityPerBp = perLocusRInverseRate(nHets); + hetHomRatio = ratio(nHets, nHomVar); + indelRate = perLocusRate(nDeletions + nInsertions); + indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions); + deletionInsertionRatio = ratio(nDeletions, nInsertions); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java index 1a7f2692d..11cb7829e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java @@ -20,17 +20,30 @@ import java.util.Arrays; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ -@Analysis(name = "dbOverlap", description = "the overlap between DbSNP sites and other SNP tracks") +@Analysis(name = "DbSNP Overlap", description = "the overlap between DbSNP sites and other SNP tracks") public class DbSNPPercentage extends VariantEvaluator { - @DataPoint(name = "DbSNP_count", description = "number of DPSNP sites") + + @DataPoint(name = "DbSNP count", description = "number of DPSNP sites") private long nDBSNPs = 0; - @DataPoint(name = "total_count", description = "number of total snp sites") + + @DataPoint(name = "total count", description = "number of total snp sites") private long nEvalSNPs = 0; - @DataPoint(name = "number_snps_at_dbsnp", description = "number of SNP sites at DPSNP sites") + + @DataPoint(name = "novel snps", description = "number of total snp sites") + private long novelSites = 0; + + @DataPoint(name = "snps at dbsnp", description = "number of SNP sites at DPSNP sites") private long nSNPsAtdbSNPs = 0; - @DataPoint(name = "number_concordant", description = "number of concordant sites") + + @DataPoint(name = "% eval in comp", description = "percentage of SNP sites at DPSNP sites") + private double dbSNPRate = 0.0; + + @DataPoint(name = "concordant", description = "number of concordant sites") private long nConcordant = 0; + @DataPoint(name = "% concordant", description = "the concordance rate") + private double concordantRate = 0.0; + public DbSNPPercentage(VariantEval2Walker parent) { // don't do anything } @@ -43,67 +56,20 @@ public class DbSNPPercentage extends VariantEvaluator { return 2; // we need to see each eval track and each comp track } - public long nDBSNPs() { - return nDBSNPs; - } + public long nNovelSites() { return Math.abs(nEvalSNPs - nSNPsAtdbSNPs); } + public double dbSNPRate() { return rate(nSNPsAtdbSNPs, nEvalSNPs); } + public double concordanceRate() { return rate(nConcordant, nSNPsAtdbSNPs); } - public long nEvalSNPs() { - return nEvalSNPs; - } - - public long nSNPsAtdbSNPs() { - return nSNPsAtdbSNPs; - } - - public long nConcordant() { - return nConcordant; - } - - public long nNovelSites() { - return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); - } - - - /** - * What fraction of the evaluated site variants were also found in the db? - * - * @return db rate - */ - public double dbSNPRate() { - return rate(nSNPsAtdbSNPs(), nEvalSNPs()); - } - - public double concordanceRate() { - return rate(nConcordant(), nSNPsAtdbSNPs()); - } - - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %d %d %d %.2f %.2f", - nDBSNPs(), nEvalSNPs(), nSNPsAtdbSNPs(), nConcordant(), nNovelSites(), 100 * dbSNPRate(), 100 * concordanceRate()); - } - - private static List HEADER = - Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant", - "n_novel_snps", "percent_eval_in_comp", "concordance_rate"); - - // making it a table - - public List getTableHeader() { - return HEADER; + public void finalizeEvaluation() { + dbSNPRate = 100 * dbSNPRate(); + concordantRate = 100 * concordanceRate(); + novelSites = nNovelSites(); } public boolean enabled() { return true; } - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } - /** * Returns true if every allele in eval is also in dbsnp * diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java index 9c0208021..8b9ee324a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java @@ -30,54 +30,11 @@ public class GenotypeConcordance extends VariantEvaluator { private HashMap alleleCountStats = new HashMap(); // a mapping from sample to stats - @DataPoint(description = "the concordance statistics for each sample") - private HashMap concordanceStats = null; + @DataPoint(name="samples", description = "the concordance statistics for each sample") + SampleStats sampleStats = null; private static final int MAX_MISSED_VALIDATION_DATA = 10000; - private static final int nGenotypeTypes = Genotype.Type.values().length; - - class SampleStats implements TableType { - - long[][] concordance = new long[nGenotypeTypes][nGenotypeTypes]; - - // TableType methods - - public Object[] getRowKeys() { - return Genotype.Type.values(); - } - - public Object[] getColumnKeys() { - return Genotype.Type.values(); - } - - public String getCell(int x, int y) { - return String.valueOf(concordance[x][y]); - } - - public String getName() { - return "SampleStats"; - } - - public String toString() { - StringBuffer sb = new StringBuffer(); - for (int truth = 0; truth < nGenotypeTypes; truth++) { - // don't print out when truth = no-call - if (truth == Genotype.Type.NO_CALL.ordinal()) - continue; - long total = 0; - for (int called = 0; called < nGenotypeTypes; called++) - total += concordance[truth][called]; - sb.append(String.format("%d %d %.2f ", total, concordance[truth][truth], total == 0 ? 0.0 : (100.0 * (double) concordance[truth][truth] / (double) total))); - for (int called = 0; called < nGenotypeTypes; called++) { - if (called != truth) - sb.append(String.format("%d ", concordance[truth][called])); - } - } - - return sb.toString(); - } - } class FrequencyStats implements TableType { long nFound = 0; @@ -152,19 +109,7 @@ public class GenotypeConcordance extends VariantEvaluator { } public List> getTableRows() { - ArrayList> rows = new ArrayList>(); - - if (concordanceStats != null) { - for (Map.Entry sample : concordanceStats.entrySet()) - rows.add(Arrays.asList(String.format("%s %s", sample.getKey(), sample.getValue().toString()).split(" "))); - } - - if (alleleCountStats != null) { - for (Map.Entry alleleCount : alleleCountStats.entrySet()) - rows.add(Arrays.asList(String.format("%d %s", alleleCount.getKey(), alleleCount.getValue().toString()).split(" "))); - } - - return rows; + return null; } private boolean warnedAboutValidationData = false; @@ -176,10 +121,10 @@ public class GenotypeConcordance extends VariantEvaluator { if (eval == null && !isValidVC(validation)) return interesting; - if (concordanceStats == null) { + if (sampleStats == null) { if (eval != null) { // initialize the concordance table - createConcordanceTable(eval); + sampleStats = new SampleStats(eval,Genotype.Type.values().length); for (VariantContext vc : missedValidationData) determineStats(null, vc); missedValidationData = null; @@ -208,8 +153,7 @@ public class GenotypeConcordance extends VariantEvaluator { // determine concordance for eval data if (eval != null) { - - for (String sample : eval.getSampleNames()) { + for (String sample : eval.getSampleNames()) { Genotype.Type called = eval.getGenotype(sample).getType(); Genotype.Type truth; @@ -218,24 +162,16 @@ public class GenotypeConcordance extends VariantEvaluator { else truth = validation.getGenotype(sample).getType(); - SampleStats stats = concordanceStats.get(sample); - if (stats == null) - throw new StingException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context"); - stats.concordance[truth.ordinal()][called.ordinal()]++; + sampleStats.incrValue(sample, truth, called); } } // otherwise, mark no-calls for all samples else { - Genotype.Type called = Genotype.Type.NO_CALL; for (String sample : validation.getSampleNames()) { - SampleStats stats = concordanceStats.get(sample); - if (stats == null) - continue; - Genotype.Type truth = validation.getGenotype(sample).getType(); - stats.concordance[truth.ordinal()][called.ordinal()]++; + sampleStats.incrValue(sample, truth, called); } } @@ -258,11 +194,78 @@ public class GenotypeConcordance extends VariantEvaluator { private static boolean isValidVC(VariantContext vc) { return (vc != null && !vc.isFiltered()); } +} - private void createConcordanceTable(VariantContext vc) { - concordanceStats = new HashMap(); - for (String sample : vc.getSampleNames()) - concordanceStats.put(sample, new SampleStats()); +/** + * a table of sample names to genotype concordance figures + */ +class SampleStats implements TableType { + private final int nGenotypeTypes; + + // sample to concordance stats object + private HashMap concordanceStats = new HashMap(); + + /** + * + * @return one row per sample + */ + public Object[] getRowKeys() { + return concordanceStats.keySet().toArray(new String[concordanceStats.size()]); } -} \ No newline at end of file + /** + * increment the specified value + * @param sample the sample name + * @param truth the truth type + * @param called the called type + */ + public void incrValue(String sample, Genotype.Type truth, Genotype.Type called) { + if (!concordanceStats.containsKey(sample)) + throw new StingException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context"); + concordanceStats.get(sample)[truth.ordinal()][called.ordinal()]++; + } + + /** + * get the column keys + * @return a list of objects, in this case strings, that are the column names + */ + public Object[] getColumnKeys() { + return new String[]{"total_true_ref","n_ref/ref","%_ref/ref", + "n_ref/no-call","n_ref/het","n_ref/hom", + "total_true_het","n_het/het","%_het/het", + "n_het/no-call","n_het/ref","n_het/hom", + "total_true_hom","n_hom/hom","%_hom/hom", + "n_hom/no-call","n_hom/ref","n_hom/het"}; + } + + public SampleStats(VariantContext vc, int nGenotypeTypes) { + this.nGenotypeTypes = nGenotypeTypes; + for (String sample : vc.getSampleNames()) + concordanceStats.put(sample, new long[nGenotypeTypes][nGenotypeTypes]); + } + + public Object getCell(int x, int y) { + // we have three rows of 6 right now for output (rows: ref, het, hom) + Genotype.Type type = Genotype.Type.values()[(y/6)+1]; // get the row type + // save some repeat work, get the total every time + long total = 0; + for (int called = 0; called < nGenotypeTypes; called++) + total += concordanceStats.get((String) getRowKeys()[x])[type.ordinal()][called]; + + // now get the cell they're interested in + switch (y % 6) { + case (0): // get the total_true for this type + return total; + case (1): + return concordanceStats.get((String)getRowKeys()[x])[type.ordinal()][type.ordinal()]; + case (2): + return total == 0 ? 0.0 : (100.0 * (double) concordanceStats.get((String)getRowKeys()[x])[type.ordinal()][type.ordinal()] / (double) total); + default: + return concordanceStats.get((String)getRowKeys()[x])[type.ordinal()][(y % 6) - 3]; + } + } + + public String getName() { + return "Sample Statistics"; + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java index 625a6d89e..6015e0a64 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java @@ -44,19 +44,19 @@ import java.util.regex.Matcher; @Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator") public class MendelianViolationEvaluator extends VariantEvaluator { - @DataPoint(name = "number_variants", description = "Number of mendelian variants found") + @DataPoint(name = "variants", description = "Number of mendelian variants found") long nVariants; - @DataPoint(name = "number_violations", description = "Number of mendelian violations found") + @DataPoint(name = "violations", description = "Number of mendelian violations found") long nViolations; - @DataPoint(description = "number of child hom ref calls where the parent was hom variant") + @DataPoint(name="KHR->PHV",description = "number of child hom ref calls where the parent was hom variant") long KidHomRef_ParentHomVar; - @DataPoint(description = "number of child het calls where the parent was hom ref") + @DataPoint(name="KHET->PHR",description = "number of child het calls where the parent was hom ref") long KidHet_ParentsHomRef; - @DataPoint(description = "number of child het calls where the parent was hom variant") + @DataPoint(name="KHET->PHV",description = "number of child het calls where the parent was hom variant") long KidHet_ParentsHomVar; - @DataPoint(description = "number of child hom variant calls where the parent was hom ref") + @DataPoint(name="KHV->PHR",description = "number of child hom variant calls where the parent was hom ref") long KidHomVar_ParentHomRef; VariantEval2Walker parent; @@ -177,25 +177,4 @@ public class MendelianViolationEvaluator extends VariantEvaluator { return true; } - - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %d %d %d %d", nVariants, nViolations, KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef); - } - - private static List HEADER = - Arrays.asList("nVariants", "nViolations", "KidHomRef_ParentHomVar", "KidHet_ParentsHomRef", "KidHet_ParentsHomVar", "KidHomVar_ParentHomRef"); - - // making it a table - - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java index b6b59b5c4..794cea7ad 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java @@ -17,10 +17,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator { long nTi = 0; @DataPoint(name = "tv_count", description = "number of transversion loci") long nTv = 0; - @DataPoint(name = "ti_count_std", description = "number of transition sites in the std") + @DataPoint(name = "ti/tv ratio", description = "the transition to transversion ratio") + double tiTvRatio = 0.0; + @DataPoint(name = "ti_count_std", description = "number of standard transition sites") long nTiInStd = 0; - @DataPoint(name = "tv_count_std", description = "number of transversion sites in the std") + @DataPoint(name = "tv_count_std", description = "number of standard transversion sites") long nTvInStd = 0; + @DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio") + double TiTvRatioStandard = 0.0; public TiTvVariantEvaluator(VariantEval2Walker parent) { // don't do anything @@ -43,7 +47,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator { if (vc.isTransition()) { if (updateStandard) nTiInStd++; else nTi++; - } else { + } else { if (updateStandard) nTvInStd++; else nTv++; } @@ -61,25 +65,10 @@ public class TiTvVariantEvaluator extends VariantEvaluator { return null; // we don't capture any intersting sites } - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %.2f %d %d %.2f", - nTi, nTv, ratio(nTi, nTv), - nTiInStd, nTvInStd, ratio(nTiInStd, nTvInStd)); - } - - private static List HEADER = - Arrays.asList("nTi", "nTv", "TiTvRatio", "nTiStandard", "nTvStandard", "TiTvRatioStandard"); - - // making it a table - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); + @Override + public void finalizeEvaluation() { + // the ti/tv ratio needs to be set (it's not calculated per-variant). + this.tiTvRatio = rate(nTi,nTv); + this.TiTvRatioStandard = rate(nTiInStd,nTvInStd); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java index dc221ee8c..32368a992 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import java.util.List; import java.util.Arrays; @@ -20,13 +21,38 @@ import java.util.Arrays; */ @Analysis(name = "Validation Rate", description = "Validation Rate") public class ValidationRate extends VariantEvaluator { + @DataPoint(name="# mono in comp",description = "Number of mono calls in the comparison ROD") + long n_mono_in_comp; + @DataPoint(name="# poly in comp",description = "Number of poly calls in the comparison ROD") + long n_poly_in_comp; + @DataPoint(name="% poly in comp",description = "Percent of poly calls in the comparison ROD") + double percent_poly_in_comp; + @DataPoint(name="# mono calls at mono sites",description = "Number of mono calls at mono sites") + long n_mono_calls_at_mono_sites; + @DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at mono sites") + long n_poly_calls_at_mono_sites; + @DataPoint(name="# nocalls at mono sites",description = "Number of no calls at mono sites") + long n_nocalls_at_mono_sites; + @DataPoint(name="# mono sites called poly",description = "Percentage of mono sites called poly") + double percent_mono_sites_called_poly; + @DataPoint(name="# mono calls at poly sites",description = "Number of mono calls at poly sites") + long n_mono_calls_at_poly_sites; + @DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at poly sites") + long n_poly_calls_at_poly_sites; + @DataPoint(name="# nocalls at poly sites",description = "Number of no calls at poly sites") + long n_nocalls_at_poly_sites; + @DataPoint(name="% poly sites called poly",description = "Number of poly sites called poly") + double percent_poly_sites_called_poly; + @DataPoint(description = "The PPV") + double PPV; + @DataPoint(description = "The sensitivity") + double sensitivity; // todo -- subset validation data by list of samples, if provided - - // todo -- print out PPV and sensitivity numbers - class SiteStats { - long nPoly = 0, nMono = 0, nNoCall = 0; + long nPoly = 0; + long nMono = 0; + long nNoCall = 0; double polyPercent() { return 100 * rate(nPoly, nPoly + nMono + nNoCall); @@ -49,42 +75,33 @@ public class ValidationRate extends VariantEvaluator { return 2; // we need to see each eval track and each comp track } - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { + @Override + public void finalizeEvaluation() { long TP = evalOverlapAtPoly.nPoly + evalOverlapAtMono.nMono; long FP = evalOverlapAtMono.nPoly + evalOverlapAtPoly.nMono; long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall; - return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f %.2f %.2f", - validationStats.nMono, validationStats.nPoly, validationStats.polyPercent(), - evalOverlapAtMono.nMono, evalOverlapAtMono.nPoly, evalOverlapAtMono.nNoCall, evalOverlapAtMono.polyPercent(), - evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent(), - 100 * rate(TP, TP + FP), 100 * rate(TP, TP + FN)); - } + // fill in the output fields + n_mono_in_comp = validationStats.nMono; + n_poly_in_comp = validationStats.nPoly; + percent_poly_in_comp = validationStats.polyPercent(); + n_mono_calls_at_mono_sites = evalOverlapAtMono.nMono; + n_poly_calls_at_mono_sites = evalOverlapAtMono.nPoly; + n_nocalls_at_mono_sites = evalOverlapAtMono.nNoCall; + percent_mono_sites_called_poly = evalOverlapAtMono.polyPercent(); + n_mono_calls_at_poly_sites = evalOverlapAtPoly.nMono; + n_poly_calls_at_poly_sites = evalOverlapAtPoly.nMono; + n_nocalls_at_poly_sites = evalOverlapAtPoly.nNoCall; + percent_poly_sites_called_poly = evalOverlapAtPoly.polyPercent(); + PPV = 100 * rate(TP, TP + FP); + sensitivity = 100 * rate(TP, TP + FN); - private static List HEADER = - Arrays.asList("n_mono_in_comp", "n_poly_in_comp", "percent_poly_in_comp", - "n_mono_calls_at_mono_sites", "n_poly_calls_at_mono_sites", "n_nocalls_at_mono_sites", "percent_mono_sites_called_poly", - "n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly", - "PPV", "Sensitivity"); - - // making it a table - - public List getTableHeader() { - return HEADER; } public boolean enabled() { return true; } - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } - public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { String interesting = null; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java index 5c4681368..95495f170 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java @@ -8,6 +8,9 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.playground.utils.report.ReportMarshaller; +import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory; +import org.broadinstitute.sting.playground.utils.report.utils.Node; import org.broadinstitute.sting.utils.PackageUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; @@ -121,6 +124,10 @@ public class VariantEval2Walker extends RodWalker { @Argument(shortName="maxRsIDBuild", fullName="maxRsIDBuild", doc="If provided, only variants with rsIDs <= maxRsIDBuild will be included in the set of known snps", required=false) protected int maxRsIDBuild = Integer.MAX_VALUE; + @Argument(shortName="reportType", fullName="reportType", doc="If provided, set the template type", required=false) + protected VE2ReportFactory.VE2TemplateType reportType = VE2ReportFactory.defaultReportFormat; + + Set rsIDsToExclude = null; // -------------------------------------------------------------------------------------------------------------- @@ -555,35 +562,49 @@ public class VariantEval2Walker extends RodWalker { } public void onTraversalDone(Integer result) { - // todo -- this really needs to be pretty printed; use some kind of table organization - // todo -- so that we can load up all of the data in one place, analyze the widths of the columns - // todo -- and pretty print it + ReportMarshaller marshaller = VE2ReportFactory.getTemplate(out,reportType,createExtraOutputTags()); for ( String evalName : variantEvaluationNames ) { - boolean first = true; - out.printf("%n%n"); - - // todo -- show that comp is dbsnp, etc. is columns - String lastEvalTrack = null; for ( EvaluationContext group : contexts ) { - if ( lastEvalTrack == null || ! lastEvalTrack.equals(group.evalTrackName) ) { - out.printf("%s%n", Utils.dupString('-', 80)); - lastEvalTrack = group.evalTrackName; - } - VariantEvaluator eval = getEvalByName(evalName, group.evaluations); - String keyWord = group.getDisplayName(); + // finalize the evaluation + eval.finalizeEvaluation(); - if ( eval.enabled() ) { - if ( first ) { - out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader())); - first = false; - } - - for ( List row : eval.getTableRows() ) - out.printf("%20s %s %s%n", evalName, formatKeyword(keyWord), Utils.join("\t", row)); - } + if ( eval.enabled() ) + marshaller.write(createPrependNodeList(group),eval); } } + marshaller.close(); + } + + /** + * create some additional output lines about the analysis + * @return a list of nodes to attach to the report as tags + */ + private List createExtraOutputTags() { + List list = new ArrayList(); + list.add(new Node("reference file",getToolkit().getArguments().referenceFile.getName(),"The reference sequence file")); + for (String binding : getToolkit().getArguments().RODBindings) + list.add(new Node("ROD binding",binding,"The reference sequence file")); + return list; + } + + + /** + * given the evaluation name, and the context, create the list of pre-pended nodes for the output system. + * Currently it expects the the following list: jexl_expression, evaluation_name, comparison_name, filter_name, + * novelty_name + * @param group the evaluation context + * @return a list of Nodes to prepend the analysis module output with + */ + private List createPrependNodeList(EvaluationContext group) { + // add the branching nodes: jexl expression, comparison track, eval track etc + Node jexlNode = new Node("jexl_expression",(group.selectExp != null) ? group.selectExp.exp.toString() : "none","The jexl filtering expression"); + Node compNode = new Node("comparison_name",group.compTrackName,"The comparison track name"); + Node evalNode = new Node("evaluation_name",group.evalTrackName,"The evaluation name"); + Node filterNode = new Node("filter_name",group.filtered,"The filter name"); + Node noveltyNode = new Node("novelty_name",group.novelty,"The novelty name"); + // the ordering is important below, this is the order the columns will appear in any output format + return Arrays.asList(evalNode,compNode,jexlNode,filterNode,noveltyNode); } protected Logger getLogger() { return logger; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java index 6727c59bf..242512f49 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java @@ -55,13 +55,10 @@ abstract class VariantEvaluator { return null; } - public void finalize() {} - - public abstract String toString(); - - // making it a table - public abstract List getTableHeader(); - public abstract List> getTableRows(); + /** + * override this method for any finalization of calculations after the analysis is completed + */ + public void finalizeEvaluation() {} // // useful common utility routines diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java b/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java index e13f88ebf..4a0dde3cc 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.StingException; import java.lang.annotation.Annotation; import java.lang.reflect.Field; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; @@ -45,9 +46,9 @@ import java.util.Map; public class AnalysisModuleScanner { // what we extracted from the class - private Map parameters = new HashMap(); // the parameter annotations - private Map datums = new HashMap(); // the data we've discovered - private Analysis analysis; // the analysis annotation + private Map parameters = new LinkedHashMap(); // the parameter annotations + private Map datums = new LinkedHashMap(); // the data we've discovered + private Analysis analysis; // the analysis annotation // private storage of the class type private final Class cls; diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java b/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java index 37b1deb8e..4459fce57 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java @@ -27,12 +27,14 @@ import freemarker.template.Configuration; import freemarker.template.DefaultObjectWrapper; import freemarker.template.Template; import freemarker.template.TemplateException; +import org.broadinstitute.sting.oneoffprojects.walkers.varianteval2.CountVariants; import org.broadinstitute.sting.playground.utils.report.utils.ComplexDataUtils; import org.broadinstitute.sting.playground.utils.report.utils.Node; import org.broadinstitute.sting.utils.StingException; import java.io.*; import java.lang.reflect.Field; +import java.text.DateFormat; import java.util.*; @@ -48,7 +50,7 @@ public class ReportMarshaller { // the aggregation of all our analyses private Node root; - private File writeLocation; + private Writer writeLocation; /** * create a marshaled object @@ -57,26 +59,52 @@ public class ReportMarshaller { * @param template the template to use */ public ReportMarshaller(String reportName, File filename, Template template) { - init(reportName, filename); + try { + init(reportName, new OutputStreamWriter(new FileOutputStream(filename))); + } catch (FileNotFoundException e) { + throw new StingException("Unable to create Writer from file " + filename,e); + } temp = template; } - private void init(String reportName, File filename) { - root = new Node("report", reportName, "the overarching report object"); - root.addChild(new Node("title", reportName, "title of the report")); - this.writeLocation = filename; - } - /** * create a marshaled object * * @param reportName the report name */ - public ReportMarshaller(String reportName, File filename) { - init(reportName, filename); - temp = createTemplate(); + public ReportMarshaller(String reportName, Writer writer, Template template, List reportTags) { + init(reportName, writer); + temp = template; + for (Node n : reportTags) { + n.setTag(); + root.addChild(n); + } } + /** + * create a marshaled object + * + * @param reportName the report name + */ + public ReportMarshaller(String reportName, OutputStream writer, Template template, List reportTags) { + init(reportName, new PrintWriter(writer)); + temp = template; + for (Node n : reportTags) { + n.setTag(); + root.addChild(n); + } + } + + /** + * initialize the ReportMarshaller + * @param reportName the report name + * @param writer the output writer + */ + private void init(String reportName, Writer writer) { + root = new Node("report", reportName, DateFormat.getDateTimeInstance().format(new Date())); + root.addChild(new Node("title", reportName, "title of the report")); + this.writeLocation = writer; + } /** * add an analysis module to the output source @@ -102,30 +130,20 @@ public class ReportMarshaller { * * @param toMarshall the object to marshall */ - public void write(List prependNodes, Object toMarshall) { - // Create a context to add data to - HashMap analysisMap = new HashMap(); + public void write(List tags, Object toMarshall) { AnalysisModuleScanner moduleScanner = new AnalysisModuleScanner(toMarshall); - Node analysis = addAnalysis(moduleScanner); - analysis.addAllChildren(getParameterNodes(toMarshall, moduleScanner)); - analysis.addAllChildren(getDataPointNodes(toMarshall, moduleScanner)); - // prepend the list of nodes passed in - Node currChild = null; - for (Node n : prependNodes) { - if (currChild == null) { - root.addChild(n); - currChild = n; - } else { - currChild.addChild(n); - currChild = n; - } + Node currChild = analysis; + for (Node n : tags) { + n.setTag(); + currChild.addChild(n); } - // add this analysis to the root node - if (currChild == null) root.addChild(analysis); - else currChild.addChild(analysis); + + root.addChild(analysis); + currChild.addAllChildren(getDataPointNodes(toMarshall, moduleScanner)); + currChild.addAllChildren(getParameterNodes(toMarshall, moduleScanner)); } @@ -175,18 +193,12 @@ public class ReportMarshaller { * call the method to finalize the report */ public void close() { - Writer out = null; - try { - out = new OutputStreamWriter(new FileOutputStream(this.writeLocation)); - } catch (FileNotFoundException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - } try { // add the data to a map Map map = new HashMap(); map.put("root", root); - temp.process(map, out); - out.flush(); + temp.process(map, writeLocation); + writeLocation.flush(); } catch (TemplateException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } catch (IOException e) { @@ -194,23 +206,6 @@ public class ReportMarshaller { } } - private Template createTemplate() { - Configuration cfg = new Configuration(); - try { - cfg.setDirectoryForTemplateLoading(new File("templates")); - } catch (IOException e) { - e.printStackTrace(); - } - cfg.setObjectWrapper(new DefaultObjectWrapper()); - Template temp = null; - try { - temp = cfg.getTemplate("myTestTemp.ftl"); // TODO: obviously this has to be changed to a factory or something like that - } catch (IOException e) { - e.printStackTrace(); - } - return temp; - } - /** * helper method for adding a Node to the specified node, given the field * @@ -221,7 +216,12 @@ public class ReportMarshaller { private static void addChildNodeFromField(Object toMarshall, Field f, Node node) { f.setAccessible(true); try { - node.addAllChildren(ComplexDataUtils.resolveObjects(f.get(toMarshall))); + Collection nodes = ComplexDataUtils.resolveObjects(f.get(toMarshall)); + // we want to eliminate any data nodes that are there just to incorporate an underlying table + if (nodes.size() == 1 && nodes.iterator().next().table==true) + node.clone(nodes.iterator().next()); + else + node.addAllChildren(nodes); } catch (IllegalAccessException e) { throw new StingException("Unable to access field " + f); } diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/VE2ReportFactory.java b/java/src/org/broadinstitute/sting/playground/utils/report/VE2ReportFactory.java new file mode 100644 index 000000000..23b59e911 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/utils/report/VE2ReportFactory.java @@ -0,0 +1,89 @@ +package org.broadinstitute.sting.playground.utils.report; + +import freemarker.template.Configuration; +import freemarker.template.DefaultObjectWrapper; +import freemarker.template.Template; +import org.broadinstitute.sting.playground.utils.report.utils.Node; +import org.broadinstitute.sting.utils.StingException; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + + +/** + * + * @author aaron + * + * Class VE2ReportFactory + * + * create ReportMarshaller from writers and template types + */ +public class VE2ReportFactory { + // where templates are stored + public static final String ve2templateDir = "templates/VE2"; + + // our default output type + public static final VE2TemplateType defaultReportFormat = VE2TemplateType.Table; + + /** the types of templates we're aware of for VariantEval2 */ + public enum VE2TemplateType { + Table("human_readable.ftl"), + Grep("grep_readable.ftl"), + CSV("csv_readable.ftl"); + + public String filename; + + VE2TemplateType(String file) { + filename = file; + } + } + + /** + * create a list of RM from an mapping of writer to template type + * @param fileset the mapping of files to types + * @param reportTags the tags to append to each report root node + * @return a list of ReportMarshallers to write data to + */ + public static List getTemplate(Map fileset, List reportTags) { + List list = new ArrayList(); + for (Writer writer : fileset.keySet()) + list.add(new ReportMarshaller("Variant Eval 2 Report",writer,createTemplate(fileset.get(writer)),reportTags)); + return list; + } + + /** + * create a report ReportMarshaller from a writer, type, and any report tags + * @param writer the output object + * @param type the VE2TemplateType type + * @param reportTags the tags to append to each report root node + * @return a list of ReportMarshallers to write data to + */ + public static ReportMarshaller getTemplate(OutputStream writer,VE2TemplateType type, List reportTags) { + return new ReportMarshaller("Variant Eval 2 Report",writer,createTemplate(type),reportTags); + } + + /** + * create a template from the TemplateType + * @param template the template type + * @return a Template object + */ + private static Template createTemplate(VE2TemplateType template) { + Configuration cfg = new Configuration(); + try { + cfg.setDirectoryForTemplateLoading(new File(ve2templateDir)); + } catch (IOException e) { + throw new StingException("Unable to find template directory " + ve2templateDir,e); + } + cfg.setObjectWrapper(new DefaultObjectWrapper()); + Template temp = null; + try { + temp = cfg.getTemplate(template.filename); + } catch (IOException e) { + throw new StingException("Unable to create template file " + template.filename + " of type " + template,e); + } + return temp; + } + +} diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java b/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java index ec004f8a2..b4eb2fa45 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java @@ -23,51 +23,80 @@ public class ComplexDataUtils { public static Collection resolveObjects(Object obj) { // TODO: fix this, we need a way to get the name of the list from the data point Collection nodes = new ArrayList(); - // the simplest case - if (obj == null) - nodes.add(new Node("", "", "")); // throw new IllegalStateException("object is null"); + // the simplest case, the object is null + if (obj == null) nodes.add(new Node("", "", "")); + // capture objects of type TableTable else if (obj instanceof TableType) nodes.add(tableToNode((TableType) obj, ((TableType) obj).getName())); + + // try to handle maps else if (obj instanceof Map) { - for (Object key : ((Map) obj).keySet()) { - Node keyNode = new Node("key", key.toString(), "map key"); - nodes.add(keyNode); - keyNode.addAllChildren(resolveObjects(((Map) obj).get(key))); - } + extractMap(obj, nodes); + + // handle collections } else if (obj instanceof Collection) nodes.addAll(listToNode((Collection) obj, "collection")); + + // arrays else if (obj.getClass().isArray()) nodes.addAll(listToNode(Arrays.asList(obj), "array")); + + // else we have a simple object (at least try to handle it that way else - nodes.add(new Node(obj.getClass().getSimpleName(), obj.toString(), "value")); + nodes.add(extractPlainObjectOrPrimitive(obj.getClass().getSimpleName(),obj)); + + // return the collection of nodes we've parsed out return nodes; } + /** + * extract a map object + * @param obj the object (instance of Map) + * @param nodes the node list to add our key->values to + */ + private static void extractMap(Object obj, Collection nodes) { + for (Object key : ((Map) obj).keySet()) { + Node keyNode = new Node("key", key.toString(), "map key"); + nodes.add(keyNode); + keyNode.addAllChildren(resolveObjects(((Map) obj).get(key))); + } + // special case: if the map is empty, add a null node + if (nodes.isEmpty()) nodes.add(new Node("", "", "")); + } + + /** + * extract a (hopefully) primitive value + * @param obj the object + */ + private static Node extractPlainObjectOrPrimitive(String name, Object obj) { + String value = ""; + if (obj instanceof Float || obj instanceof Double) + value = String.format("%.4f",(Double)obj); + else + value = obj.toString(); + return new Node(name, value, "value"); + } /** * given a TableType object, make it into a tree using maps. * * @param table the table type to convert into a map - * @return + * @return a node representing this table */ private static Node tableToNode(TableType table, String name) { Node root = new Node("table", name, "Table"); + root.setTable(); Object[] rows = table.getRowKeys(); Object[] cols = table.getColumnKeys(); // add the columns names - Node col_names = new Node("col_names", "col_names", "the column names, ~ seperated", false); - for (Object col : cols) - col_names.addChild(new Node("col", col.toString(), "a column name", false)); - root.addChild(col_names); - for (int x = 0; x < table.getRowKeys().length; x++) { - Node row = new Node("row", rows[x].toString(), "a row"); + Node row = new Node("row", rows[x].toString(), "a row in a table"); root.addChild(row); for (int y = 0; y < table.getColumnKeys().length; y++) { - Node col = new Node("column", cols[y].toString(), "a column"); + Node col = new Node("column", cols[y].toString(), "columns in a table"); row.addChild(col); - col.addChild(new Node("cell(" + x + "," + y + ")", table.getCell(x, y), "a cell")); + col.addChild(extractPlainObjectOrPrimitive("cell(" + x + "," + y + ")", table.getCell(x, y))); } } return root; diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java b/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java index 4560c5c76..db0d0f2cf 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java @@ -1,16 +1,19 @@ package org.broadinstitute.sting.playground.utils.report.utils; -import java.util.Collection; -import java.util.LinkedHashSet; +import org.broadinstitute.sting.utils.StingException; + +import java.util.*; /** * a node, extracted using the new report output system. */ public class Node { - public final String name; - public final String value; - public final String description; - public final boolean display; // is this node an output node, or a node for tracking internal data? true if output node + public String name; + public String value; + public String description; + public boolean display; // is this node an output node, or a node for tracking internal data? true if output node + public boolean table; // this is a hack, but I needed a way to indicate that a node was a row root node + public boolean tag; public Collection children; public Node(String name, String value, String description) { @@ -18,6 +21,8 @@ public class Node { this.name = name; this.description = description; display = true; + table = false; + tag = false; } public Node(String name, String value, String description, boolean display) { @@ -25,8 +30,12 @@ public class Node { this.name = name; this.description = description; this.display = display; + table = false; + tag = false; } + public void setTable() {table = true;} + public void addChild(Node child) { if (children == null) children = new LinkedHashSet(); children.add(child); @@ -63,10 +72,42 @@ public class Node { } public Collection getChildren() { - return children; + return (children == null) ? new ArrayList() : children; } public boolean getDisplay() { return display; } + + public boolean getTable() { + return table; + } + + public boolean getTag() { + return tag; + } + + public void setTag() { + this.tag = true; + } + + public void clone(Node n) { + this.name = n.name; + this.value = n.value; + this.description = n.description; + this.display = n.display; + this.table = n.table; + this.tag = n.tag; + this.children = new LinkedHashSet(); + if (n.children != null) this.children.addAll(n.getChildren()); + } + + public List> getTableRows() { + List> ret = NodeUtils.flattenToRow(this); + return ret; + } + + public int getRowCount() { + return NodeUtils.flattenToRowCount(this); + } } diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/utils/NodeUtils.java b/java/src/org/broadinstitute/sting/playground/utils/report/utils/NodeUtils.java new file mode 100644 index 000000000..228b8a239 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/utils/report/utils/NodeUtils.java @@ -0,0 +1,99 @@ +package org.broadinstitute.sting.playground.utils.report.utils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * + * @author aaron + * + * Class NodeUtils + * + * utilities for working with nodes + */ +public class NodeUtils { + + static class NodeMarker { + private Node node; + + public NodeMarker(Node n) { + node = n; + } + + public int rowCount() { + int sum = (node.table) ? node.getChildren().size() : 1; + for (Node n : node.getChildren()) { + NodeMarker fn = new NodeMarker(n); + sum = sum * fn.rowCount(); + } + return sum; + } + + private boolean validLeafNode() { + return node.getChildren().size() == 0 && node.display && !node.tag; + } + + private List> addToEachList(List> list) { + for (List lt : list) + lt.add(node); + return list; + } + + public List> toRow(List> oldList) { + // if we're a leaf node that isn't a tag, add it to each list + if (validLeafNode()) + addToEachList(oldList); + + // special case: if we've just got a single node, traverse into it + else if (node.getChildren().size() > 0 && !node.table) + for (Node n : node.children) { + oldList = new NodeMarker(n).toRow(oldList); + } + // when we encounter a table we want to branch into multiple rows + else if (node.table) { + List> newList = new ArrayList>(); + for (Node child : node.children) { + if (child.display && !child.tag) { + List> tempList = new ArrayList>(); + tempList.add(new ArrayList()); + tempList.get(0).add(child); + NodeMarker marker = new NodeMarker(child); + List> carry = marker.toRow(tempList); + newList.addAll(carry); + } + } + List> ret = new ArrayList>(); + // permutations of each previous list and the new temp list + for (List original : oldList) + for (List lst : newList) { + List temp = new ArrayList(); + temp.addAll(original); + temp.addAll(lst); + ret.add(temp); + } + return ret; + } + // be default return the old list + return oldList; + } + + } + + + // given a node, get the number of rows it will generate + public static int flattenToRowCount(Node n) { + NodeMarker fn = new NodeMarker(n); + return fn.rowCount(); + } + + // given a node, generate rows (flattening tables) + public static List> flattenToRow(Node n) { + NodeMarker fn = new NodeMarker(n); + List> nodesList = new ArrayList>(); + nodesList.add(new ArrayList()); + return fn.toRow(nodesList); + } +} diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/utils/TableType.java b/java/src/org/broadinstitute/sting/playground/utils/report/utils/TableType.java index 1f9a5a5e9..3ae51bdcc 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/utils/TableType.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/utils/TableType.java @@ -12,6 +12,6 @@ package org.broadinstitute.sting.playground.utils.report.utils; public interface TableType { public Object[] getRowKeys(); public Object[] getColumnKeys(); - public String getCell(int x, int y); + public Object getCell(int x, int y); public String getName(); } diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java index 5f3034f09..5c33328ae 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -6,12 +6,10 @@ import org.junit.Test; import java.util.HashMap; import java.util.Map; import java.util.Arrays; -import java.util.List; -import java.io.File; public class VariantEval2IntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval2" + - " -R " + oneKGLocation + "reference/human_b36_both.fasta"; + " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep"; private static String root = cmdRoot + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + @@ -20,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest { @Test public void testVE2Simple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "78e4cd917ddef84cdb3c6e95299483b4"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "a15be04ebe52fd0775695b016e016975"); + expectations.put("-L 1:1-10,000,000", "ae10d06b5d30ee227c0e1e18661a18f5"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "a9a682734dbb9a574383a6e7ab385dcc"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -41,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "d21d1dcbebcfade3d056af5f907e6f56"; // next two examples should be the same! + String eqMD5s = "0fc336da6b77cd7bf1b9c2568a57e94a"; // next two examples should be the same! expectations.put("", eqMD5s); expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "573a50b1f4ae338ef6937720a7ca5f34"); + expectations.put(" -known comp_hapmap", "b6072559ce2d01309639ebb2f6133fa9"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -62,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("00a38a8e7da6c1880c49749563eb0dcf", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("b42a8974ebb4354bf172ae5f81922f9e", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVE2WriteVCF", spec); } } diff --git a/templates/VE2/csv_readable.ftl b/templates/VE2/csv_readable.ftl new file mode 100644 index 000000000..6833d17f1 --- /dev/null +++ b/templates/VE2/csv_readable.ftl @@ -0,0 +1,82 @@ +<#-- a couple of basic fields we use--> +<#assign colWidth=20> +<#assign colTextWidth=17> +<#assign currentAnalysis=""> + +Report,${root.value} +Description,${root.description} + +<#list root.children as analysis> + <#if analysis.complex> + <#if analysis.value!=currentAnalysis> + <#assign currentAnalysis=analysis.value> + +Analysis,${analysis.value} + + <@emit_tags analysis=analysis/> + <@emit_column_names analysis=analysis/> + + + <#else> + <@emit_row_values analysis=analysis/> + + + +<#-- -------------------- --> +<#-- get the data tag values --> +<#macro emit_row_values analysis> + <#list analysis.tableRows as rows> + <@emit_tag_values analysis=analysis/> + <#list rows as node> + <#if (node.value?length > colTextWidth)> + <#lt>${node.value},<#rt> + <#else> + <#lt>${node.value},<#rt> + + + + + +<#-- -------------------- --> +<#-- get the column names --> +<#macro emit_column_names analysis> + <#if analysis.complex && analysis.display> + <#list analysis.children as child> + <#if child.complex && !child.table> + <@emit_name value=child.value/> + <#elseif child.table> + <#list child.children as rows> + <@emit_name value=rows.name/> + <#list rows.children as cols> + <@emit_name value=cols.value/> + + <#break> + + + + + +<#-- -------------------- --> +<#-- get the tag values --> +<#macro emit_tag_values analysis> + <#list analysis.children as child> + <#if child.tag> + <@emit_name value=child.value/> + + + +<#-- -------------------- --> +<#-- get the tag names --> +<#macro emit_tags analysis> + <#list analysis.children as child> + <#if child.tag> + <@emit_name value=child.name/> + + + + +<#-- -------------------- --> +<#-- a macro for cleaning up emitted names --> +<#macro emit_name value> + <#lt>${value},<#rt> + diff --git a/templates/VE2/grep_readable.ftl b/templates/VE2/grep_readable.ftl new file mode 100644 index 000000000..3db1288dc --- /dev/null +++ b/templates/VE2/grep_readable.ftl @@ -0,0 +1,59 @@ +<#list root.children as analysis> + <#if analysis.display && !analysis.tag> + <@recurse_macro node=analysis prefix=get_tags(analysis)/> + + +<#-- ------------------- --> +<#-- Table display macro --> +<#macro displayTable table> +${table.name} + <#list table.rows as row> + <#compress> + <#list row as item> + ${item}, + + + + +<#function get_tags rootnode> + <#assign ret=""> + <#list rootnode.children as child> + <#if child.tag> + <#if ret==""> + <#assign ret="[${child.name}=${child.value}]"> + <#else> + <#assign ret="${ret}.[${child.name}=${child.value}]"> + + + + <#return ret> + +<#-- -------------------- --> +<#-- recursively get data --> +<#macro recurse_macro node prefix> + <#if node.display> <#-- we don't display it if the value isn't set --> + <#compress> + <#if node.complex> + <#list node.children as child> + <#if prefix==""> + <#assign newPrefix="[${node.name}=${node.value}]"> + <#else> + <#assign newPrefix="${prefix}.[${node.name}=${node.value}]"> + + <@recurse_macro node=child prefix=newPrefix/> + + <#elseif node.display && !node.tag> + ${prefix} ${node.value} + <#assign prefix=""> + + + + +<#-- ------------------------------------- --> +<#-- display a list of single values macro --> +<#macro displaySimple listof> + <#list listof as point> +${point.name?right_pad(20)} <@recurse_macro node=point/> # ${point.description} + + +<#-- ------------------------------------- --> diff --git a/templates/VE2/human_readable.ftl b/templates/VE2/human_readable.ftl new file mode 100644 index 000000000..ef715f6b4 --- /dev/null +++ b/templates/VE2/human_readable.ftl @@ -0,0 +1,114 @@ +<#-- a couple of basic fields we use--> +<#assign colWidth=20> +<#assign colTextWidth=17> +<#assign currentAnalysis=""> + +Report: ${root.value} +Description: ${root.description} + +<#-- get any tags on the root node --> +<#list root.children as tagNode> + <#if tagNode.tag> +Tag: ${tagNode.name} = ${tagNode.value} + + + +<#list root.children as analysis> + <#if analysis.complex> + <#if analysis.value!=currentAnalysis> + <#assign currentAnalysis=analysis.value> + +Analysis: ${analysis.value} + +Column names specific to the analysis: + <@emit_column_names_with_descriptions analysis=analysis/> + + <@emit_tags analysis=analysis/> + <@emit_column_names analysis=analysis/> + +---------------------------------------------------------------------------------------------------------------------------------------- + + <@emit_row_values analysis=analysis/> + + +<#-- -------------------- --> +<#-- get the data tag values --> +<#macro emit_row_values analysis> + <#list analysis.tableRows as rows> + <@emit_tag_values analysis=analysis/> + <#list rows as node> + <#if (node.value?length > colTextWidth)> + <#lt>${(node.value?substring(0, colTextWidth)+"..")?right_pad(colWidth)}<#rt> + <#else> + <#lt>${node.value?right_pad(colWidth)}<#rt> + + + + + +<#-- -------------------- --> +<#-- get the column names --> +<#macro emit_column_names analysis> + <#if analysis.complex && analysis.display> + <#list analysis.children as child> + <#if child.complex && !child.table> + <@emit_name value=child.value/> + <#elseif child.table> + <#list child.children as rows> + <@emit_name value=rows.name/> + <#list rows.children as cols> + <@emit_name value=cols.value/> + + <#break> + + + + + +<#-- -------------------- --> +<#-- get the column names --> +<#macro emit_column_names_with_descriptions analysis> + <#if analysis.complex && analysis.display> + <#list analysis.children as child> + <#if child.complex && !child.table> + ${child.value?right_pad(40)}${child.description} + <#elseif child.table> + <#list child.children as rows> + ${rows.name?right_pad(40)}${rows.description} + <#list rows.children as cols> + ${cols.value?right_pad(40)}${cols.description} + + <#break> + + + + + +<#-- -------------------- --> +<#-- get the tag values --> +<#macro emit_tag_values analysis> + <#list analysis.children as child> + <#if child.tag> + <@emit_name value=child.value/> + + + +<#-- -------------------- --> +<#-- get the tag names --> +<#macro emit_tags analysis> + <#list analysis.children as child> + <#if child.tag> + <@emit_name value=child.name/> + + + + +<#-- -------------------- --> +<#-- a macro for cleaning up emitted names --> +<#macro emit_name value> + <#if (value?length > colTextWidth)> + <#lt>${(value?substring(0, colTextWidth)+"..")?right_pad(colWidth)}<#rt> + <#else> + <#lt>${value?right_pad(colWidth)}<#rt> + +