From 60dfba997bb0843b70c70d24dbbe2edd310587a6 Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 24 Mar 2010 05:40:10 +0000 Subject: [PATCH] added some sample annotations to VariantEval2 analysis modules, and some changes to the report system. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3067 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/varianteval2/CountVariants.java | 74 ++++++++--- .../walkers/varianteval2/DbSNPPercentage.java | 73 +++++++---- .../varianteval2/GenotypeConcordance.java | 120 ++++++++++++------ .../MendelianViolationEvaluator.java | 72 +++++++---- .../varianteval2/TiTvVariantEvaluator.java | 34 +++-- .../walkers/varianteval2/ValidationRate.java | 26 ++-- .../utils/report/ReportMarshaller.java | 73 ++++++++--- .../utils/report/TableFormatter.java | 99 --------------- .../utils/report/utils/ComplexDataUtils.java | 55 ++++---- .../playground/utils/report/utils/Node.java | 39 ++++-- 10 files changed, 391 insertions(+), 274 deletions(-) delete mode 100644 java/src/org/broadinstitute/sting/playground/utils/report/TableFormatter.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java index 9503e04e1..b70ff8ac6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/CountVariants.java @@ -5,39 +5,61 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.utils.StingException; import java.util.List; import java.util.Arrays; +@Analysis(name = "Count Variants", description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator { + @DataPoint(description = "Number of processed loci") long nProcessedLoci = 0; + @DataPoint(description = "Number of called loci") long nCalledLoci = 0; + @DataPoint(description = "Number of variant loci") long nVariantLoci = 0; + @DataPoint(description = "Number of reference loci") long nRefLoci = 0; + @DataPoint(description = "Number of snp loci") long nSNPs = 0; + @DataPoint(description = "Number of insertions") long nInsertions = 0; + @DataPoint(description = "Number of deletions") long nDeletions = 0; + @DataPoint(description = "Number of complex loci") long nComplex = 0; + @DataPoint(description = "Number of no calls loci") long nNoCalls = 0; + @DataPoint(description = "Number of het loci") long nHets = 0; + @DataPoint(description = "Number of hom ref loci") long nHomRef = 0; + @DataPoint(description = "Number of hom var loci") long nHomVar = 0; public CountVariants(VariantEval2Walker parent) { // don't do anything } - private double perLocusRate(long n) { return rate(n, nProcessedLoci); } - private long perLocusRInverseRate(long n) { return inverseRate(n, nProcessedLoci); } + private double perLocusRate(long n) { + return rate(n, nProcessedLoci); + } + + private long perLocusRInverseRate(long n) { + return inverseRate(n, nProcessedLoci); + } public String getName() { return "counter"; } - public boolean enabled() { return true; } + public boolean enabled() { + return true; + } public int getComparisonOrder() { return 1; // we only need to see each eval track @@ -51,27 +73,43 @@ public class CountVariants extends VariantEvaluator { //nProcessedLoci++; nCalledLoci++; - if ( vc1.isVariant() ) nVariantLoci++; - switch ( vc1.getType() ) { - case NO_VARIATION: nRefLoci++; break; - case SNP: nSNPs++; break; - case INDEL: - if ( vc1.isInsertion() ) nInsertions++; else nDeletions++; + if (vc1.isVariant()) nVariantLoci++; + switch (vc1.getType()) { + case NO_VARIATION: + nRefLoci++; + break; + case SNP: + nSNPs++; + break; + case INDEL: + if (vc1.isInsertion()) nInsertions++; + else nDeletions++; + break; + case MIXED: + nComplex++; break; - case MIXED: nComplex++; break; } - for ( Genotype g : vc1.getGenotypes().values() ) { - switch ( g.getType() ) { - case NO_CALL: nNoCalls++; break; - case HOM_REF: nHomRef++; break; - case HET: nHets++; break; - case HOM_VAR: nHomVar++; break; - default: throw new StingException("BUG: Unexpected genotype type: " + g); + for (Genotype g : vc1.getGenotypes().values()) { + switch (g.getType()) { + case NO_CALL: + nNoCalls++; + break; + case HOM_REF: + nHomRef++; + break; + case HET: + nHets++; + break; + case HOM_VAR: + nHomVar++; + break; + default: + throw new StingException("BUG: Unexpected genotype type: " + g); } } - return null; // we don't capture any intersting sites + return null; // we don't capture any interesting sites } public String toString() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java index 79a0a12e3..1a7f2692d 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java @@ -5,6 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import java.util.List; import java.util.Arrays; @@ -18,11 +20,16 @@ import java.util.Arrays; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ +@Analysis(name = "dbOverlap", description = "the overlap between DbSNP sites and other SNP tracks") public class DbSNPPercentage extends VariantEvaluator { - private long nDBSNPs = 0; - private long nEvalSNPs = 0; + @DataPoint(name = "DbSNP_count", description = "number of DPSNP sites") + private long nDBSNPs = 0; + @DataPoint(name = "total_count", description = "number of total snp sites") + private long nEvalSNPs = 0; + @DataPoint(name = "number_snps_at_dbsnp", description = "number of SNP sites at DPSNP sites") private long nSNPsAtdbSNPs = 0; - private long nConcordant = 0; + @DataPoint(name = "number_concordant", description = "number of concordant sites") + private long nConcordant = 0; public DbSNPPercentage(VariantEval2Walker parent) { // don't do anything @@ -36,11 +43,25 @@ public class DbSNPPercentage extends VariantEvaluator { return 2; // we need to see each eval track and each comp track } - public long nDBSNPs() { return nDBSNPs; } - public long nEvalSNPs() { return nEvalSNPs; } - public long nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; } - public long nConcordant() { return nConcordant; } - public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); } + public long nDBSNPs() { + return nDBSNPs; + } + + public long nEvalSNPs() { + return nEvalSNPs; + } + + public long nSNPsAtdbSNPs() { + return nSNPsAtdbSNPs; + } + + public long nConcordant() { + return nConcordant; + } + + public long nNovelSites() { + return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); + } /** @@ -48,9 +69,14 @@ public class DbSNPPercentage extends VariantEvaluator { * * @return db rate */ - public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); } - public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); } - + public double dbSNPRate() { + return rate(nSNPsAtdbSNPs(), nEvalSNPs()); + } + + public double concordanceRate() { + return rate(nConcordant(), nSNPsAtdbSNPs()); + } + public String toString() { return getName() + ": " + summaryLine(); } @@ -65,12 +91,15 @@ public class DbSNPPercentage extends VariantEvaluator { "n_novel_snps", "percent_eval_in_comp", "concordance_rate"); // making it a table + public List getTableHeader() { return HEADER; } - public boolean enabled() { return true; } - + public boolean enabled() { + return true; + } + public List> getTableRows() { return Arrays.asList(Arrays.asList(summaryLine().split(" "))); } @@ -78,13 +107,13 @@ public class DbSNPPercentage extends VariantEvaluator { /** * Returns true if every allele in eval is also in dbsnp * - * @param eval eval context - * @param dbsnp db context + * @param eval eval context + * @param dbsnp db context * @return true if eval and db are discordant */ - public boolean discordantP(VariantContext eval, VariantContext dbsnp ) { - for ( Allele a : eval.getAlleles() ) { - if ( ! dbsnp.hasAllele(a, true) ) + public boolean discordantP(VariantContext eval, VariantContext dbsnp) { + for (Allele a : eval.getAlleles()) { + if (!dbsnp.hasAllele(a, true)) return true; } @@ -95,13 +124,13 @@ public class DbSNPPercentage extends VariantEvaluator { boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); boolean evalIsGood = eval != null && eval.isSNP(); - if ( dbSNPIsGood ) nDBSNPs++; // count the number of dbSNP events - if ( evalIsGood ) nEvalSNPs++; // count the number of dbSNP events + if (dbSNPIsGood) nDBSNPs++; // count the number of dbSNP events + if (evalIsGood) nEvalSNPs++; // count the number of dbSNP events - if ( dbSNPIsGood && evalIsGood ) { + if (dbSNPIsGood && evalIsGood) { nSNPsAtdbSNPs++; - if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value + if (!discordantP(eval, dbsnp)) // count whether we're concordant or not with the dbSNP value nConcordant++; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java index a5717dd33..9c0208021 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java @@ -3,6 +3,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; +import org.broadinstitute.sting.playground.utils.report.tags.Param; +import org.broadinstitute.sting.playground.utils.report.utils.TableType; import org.broadinstitute.sting.utils.StingException; import org.apache.log4j.Logger; @@ -17,29 +21,56 @@ import java.util.*; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ +@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks") public class GenotypeConcordance extends VariantEvaluator { protected static Logger logger = Logger.getLogger(GenotypeConcordance.class); + // a mapping from allele count to stats + @DataPoint(description = "the frequency statistics for each allele") + private HashMap alleleCountStats = new HashMap(); + + // a mapping from sample to stats + @DataPoint(description = "the concordance statistics for each sample") + private HashMap concordanceStats = null; + private static final int MAX_MISSED_VALIDATION_DATA = 10000; private static final int nGenotypeTypes = Genotype.Type.values().length; - class SampleStats { + class SampleStats implements TableType { long[][] concordance = new long[nGenotypeTypes][nGenotypeTypes]; + // TableType methods + + public Object[] getRowKeys() { + return Genotype.Type.values(); + } + + public Object[] getColumnKeys() { + return Genotype.Type.values(); + } + + public String getCell(int x, int y) { + return String.valueOf(concordance[x][y]); + } + + public String getName() { + return "SampleStats"; + } + public String toString() { StringBuffer sb = new StringBuffer(); - for (int truth = 0; truth < nGenotypeTypes; truth++ ) { + for (int truth = 0; truth < nGenotypeTypes; truth++) { // don't print out when truth = no-call - if ( truth == Genotype.Type.NO_CALL.ordinal() ) + if (truth == Genotype.Type.NO_CALL.ordinal()) continue; long total = 0; - for (int called = 0; called < nGenotypeTypes; called++ ) + for (int called = 0; called < nGenotypeTypes; called++) total += concordance[truth][called]; - sb.append(String.format("%d %d %.2f ", total, concordance[truth][truth], total == 0 ? 0.0 : (100.0 * (double)concordance[truth][truth] / (double)total))); - for (int called = 0; called < nGenotypeTypes; called++ ) { - if ( called != truth ) + sb.append(String.format("%d %d %.2f ", total, concordance[truth][truth], total == 0 ? 0.0 : (100.0 * (double) concordance[truth][truth] / (double) total))); + for (int called = 0; called < nGenotypeTypes; called++) { + if (called != truth) sb.append(String.format("%d ", concordance[truth][called])); } } @@ -48,22 +79,33 @@ public class GenotypeConcordance extends VariantEvaluator { } } - class FrequencyStats { + class FrequencyStats implements TableType { long nFound = 0; long nMissed = 0; + public Object[] getRowKeys() { + return new String[]{"sample"}; + } + + public Object[] getColumnKeys() { + return new String[]{"number_found", "number_missing"}; + } + + public String getName() { + return "FrequencyStats"; + } + + public String getCell(int x, int y) { + if (y == 0) return String.valueOf(nFound); + else return String.valueOf(nMissed); + } + public String toString() { long total = nFound + nMissed; return String.format("%d %d %.2f ", nFound, nMissed, total == 0 ? 0.0 : (100.0 * rate(nFound, total))); } } - // a mapping from allele count to stats - private HashMap alleleCountStats = new HashMap(); - - // a mapping from sample to stats - private HashMap concordanceStats = null; - // keep a list of the validation data we saw before the first eval data private HashSet missedValidationData = new HashSet(); @@ -80,7 +122,9 @@ public class GenotypeConcordance extends VariantEvaluator { return 2; // we need to see each eval track and each comp track } - public boolean enabled() { return true; } + public boolean enabled() { + return true; + } public String toString() { return getName() + ": " + getTableRows(); @@ -99,6 +143,7 @@ public class GenotypeConcordance extends VariantEvaluator { Arrays.asList("alleleCount", "n_found", "n_missed", "%_found"); // making it a table + public List getTableHeader() { ArrayList header = new ArrayList(); header.addAll(SAMPLE_HEADER); @@ -109,13 +154,13 @@ public class GenotypeConcordance extends VariantEvaluator { public List> getTableRows() { ArrayList> rows = new ArrayList>(); - if ( concordanceStats != null ) { - for ( Map.Entry sample : concordanceStats.entrySet() ) + if (concordanceStats != null) { + for (Map.Entry sample : concordanceStats.entrySet()) rows.add(Arrays.asList(String.format("%s %s", sample.getKey(), sample.getValue().toString()).split(" "))); } - if ( alleleCountStats != null ) { - for ( Map.Entry alleleCount : alleleCountStats.entrySet() ) + if (alleleCountStats != null) { + for (Map.Entry alleleCount : alleleCountStats.entrySet()) rows.add(Arrays.asList(String.format("%d %s", alleleCount.getKey(), alleleCount.getValue().toString()).split(" "))); } @@ -123,26 +168,27 @@ public class GenotypeConcordance extends VariantEvaluator { } private boolean warnedAboutValidationData = false; + public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { String interesting = null; // sanity check that we at least have either eval or validation data - if ( eval == null && !isValidVC(validation) ) + if (eval == null && !isValidVC(validation)) return interesting; - if ( concordanceStats == null ) { - if ( eval != null ) { + if (concordanceStats == null) { + if (eval != null) { // initialize the concordance table createConcordanceTable(eval); - for ( VariantContext vc : missedValidationData ) + for (VariantContext vc : missedValidationData) determineStats(null, vc); missedValidationData = null; } else { // todo -- Eric, this results in a memory problem when eval is WEx data but you are using CG calls genome-wide // todo -- perhaps you need should extend the evaluators with an initialize // todo -- method that gets the header (or samples) for the first eval sites? - if ( missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) { - if ( ! warnedAboutValidationData ) { + if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) { + if (!warnedAboutValidationData) { logger.warn("Too many genotype sites missed before eval site appeared; ignoring"); warnedAboutValidationData = true; } @@ -154,26 +200,26 @@ public class GenotypeConcordance extends VariantEvaluator { } determineStats(eval, validation); - + return interesting; // we don't capture any interesting sites } private void determineStats(VariantContext eval, VariantContext validation) { // determine concordance for eval data - if ( eval != null ) { + if (eval != null) { - for ( String sample : eval.getSampleNames() ) { + for (String sample : eval.getSampleNames()) { Genotype.Type called = eval.getGenotype(sample).getType(); Genotype.Type truth; - if ( !isValidVC(validation) || !validation.hasGenotype(sample) ) + if (!isValidVC(validation) || !validation.hasGenotype(sample)) truth = Genotype.Type.NO_CALL; else truth = validation.getGenotype(sample).getType(); SampleStats stats = concordanceStats.get(sample); - if ( stats == null ) + if (stats == null) throw new StingException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context"); stats.concordance[truth.ordinal()][called.ordinal()]++; } @@ -183,9 +229,9 @@ public class GenotypeConcordance extends VariantEvaluator { Genotype.Type called = Genotype.Type.NO_CALL; - for ( String sample : validation.getSampleNames() ) { + for (String sample : validation.getSampleNames()) { SampleStats stats = concordanceStats.get(sample); - if ( stats == null ) + if (stats == null) continue; Genotype.Type truth = validation.getGenotype(sample).getType(); @@ -194,15 +240,15 @@ public class GenotypeConcordance extends VariantEvaluator { } // determine allele count concordance () - if ( isValidVC(validation) && validation.isPolymorphic() ) { + if (isValidVC(validation) && validation.isPolymorphic()) { int trueAlleleCount = 0; - for ( Allele a : validation.getAlternateAlleles() ) + for (Allele a : validation.getAlternateAlleles()) trueAlleleCount += validation.getChromosomeCount(a); - if ( !alleleCountStats.containsKey(trueAlleleCount) ) + if (!alleleCountStats.containsKey(trueAlleleCount)) alleleCountStats.put(trueAlleleCount, new FrequencyStats()); FrequencyStats stats = alleleCountStats.get(trueAlleleCount); - if ( eval != null ) + if (eval != null) stats.nFound++; else stats.nMissed++; @@ -215,7 +261,7 @@ public class GenotypeConcordance extends VariantEvaluator { private void createConcordanceTable(VariantContext vc) { concordanceStats = new HashMap(); - for ( String sample : vc.getSampleNames() ) + for (String sample : vc.getSampleNames()) concordanceStats.put(sample, new SampleStats()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java index 83168d677..625a6d89e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java @@ -6,6 +6,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.utils.StingException; import java.util.List; @@ -15,36 +17,51 @@ import java.util.regex.Matcher; /** * Mendelian violation detection and counting - * + *

* a violation looks like: * Suppose dad = A/B and mom = C/D * The child can be [A or B] / [C or D]. * If the child doesn't match this, the site is a violation - * + *

* Some examples: - * + *

* mom = A/A, dad = C/C * child can be A/C only - * + *

* mom = A/C, dad = C/C * child can be A/C or C/C - * + *

* mom = A/C, dad = A/C * child can be A/A, A/C, C/C - * + *

* The easiest way to do this calculation is to: - * + *

* Get alleles for mom => A/B * Get alleles for dad => C/D * Make allowed genotypes for child: A/C, A/D, B/C, B/D * Check that the child is one of these. */ +@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator") public class MendelianViolationEvaluator extends VariantEvaluator { - long nVariants, nViolations; - TrioStructure trio; + + @DataPoint(name = "number_variants", description = "Number of mendelian variants found") + long nVariants; + + @DataPoint(name = "number_violations", description = "Number of mendelian violations found") + long nViolations; + + @DataPoint(description = "number of child hom ref calls where the parent was hom variant") + long KidHomRef_ParentHomVar; + @DataPoint(description = "number of child het calls where the parent was hom ref") + long KidHet_ParentsHomRef; + @DataPoint(description = "number of child het calls where the parent was hom variant") + long KidHet_ParentsHomVar; + @DataPoint(description = "number of child hom variant calls where the parent was hom ref") + long KidHomVar_ParentHomRef; + VariantEval2Walker parent; - long KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef; + TrioStructure trio; private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); @@ -54,7 +71,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { public static TrioStructure parseTrioDescription(String family) { Matcher m = FAMILY_PATTERN.matcher(family); - if ( m.matches() ) { + if (m.matches()) { TrioStructure trio = new TrioStructure(); //System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE); trio.mom = m.group(1); @@ -69,7 +86,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { public MendelianViolationEvaluator(VariantEval2Walker parent) { this.parent = parent; - if ( enabled() ) { + if (enabled()) { trio = parseTrioDescription(parent.FAMILY_STRUCTURE); parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s", parent.FAMILY_STRUCTURE, trio.mom, trio.dad, trio.child)); @@ -93,30 +110,30 @@ public class MendelianViolationEvaluator extends VariantEvaluator { } public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci - Genotype momG = vc.getGenotype(trio.mom); - Genotype dadG = vc.getGenotype(trio.dad); + if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci + Genotype momG = vc.getGenotype(trio.mom); + Genotype dadG = vc.getGenotype(trio.dad); Genotype childG = vc.getGenotype(trio.child); - if ( includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG) ) { + if (includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG)) { nVariants++; - if ( momG == null || dadG == null || childG == null ) + if (momG == null || dadG == null || childG == null) throw new IllegalArgumentException(String.format("VariantContext didn't contain genotypes for expected trio members: mom=%s dad=%s child=%s", trio.mom, trio.dad, trio.child)); // all genotypes are good, so let's see if child is a violation - if ( isViolation(vc, momG, dadG, childG) ) { + if (isViolation(vc, momG, dadG, childG)) { nViolations++; String label; - if ( childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar() )) { + if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) { label = "KidHomRef_ParentHomVar"; KidHomRef_ParentHomVar++; - } else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef()) ) { + } else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) { label = "KidHet_ParentsHomRef"; KidHet_ParentsHomRef++; - } else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar()) ) { + } else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) { label = "KidHet_ParentsHomVar"; KidHet_ParentsHomVar++; } else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) { @@ -138,20 +155,20 @@ public class MendelianViolationEvaluator extends VariantEvaluator { return g.getNegLog10PError() > getQThreshold() && g.isCalled(); } - public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { + public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) { return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles()); } - public static boolean isViolation(VariantContext vc, List momA, List dadA, List childA ) { + public static boolean isViolation(VariantContext vc, List momA, List dadA, List childA) { //VariantContext momVC = vc.subContextFromGenotypes(momG); //VariantContext dadVC = vc.subContextFromGenotypes(dadG); int i = 0; Genotype childG = new Genotype("kidG", childA); - for ( Allele momAllele : momA ) { - for ( Allele dadAllele : dadA ) { - if ( momAllele.isCalled() && dadAllele.isCalled() ) { + for (Allele momAllele : momA) { + for (Allele dadAllele : dadA) { + if (momAllele.isCalled() && dadAllele.isCalled()) { Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele)); - if ( childG.sameGenotype(possibleChild, false) ) { + if (childG.sameGenotype(possibleChild, false)) { return false; } } @@ -173,6 +190,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { Arrays.asList("nVariants", "nViolations", "KidHomRef_ParentHomVar", "KidHet_ParentsHomRef", "KidHet_ParentsHomVar", "KidHomVar_ParentHomRef"); // making it a table + public List getTableHeader() { return HEADER; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java index edf126f1a..b6b59b5c4 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/TiTvVariantEvaluator.java @@ -4,20 +4,32 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import java.util.List; import java.util.Arrays; +@Analysis(name = "Ti/Tv Variant Evaluator", description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator { - long nTi = 0, nTv = 0; - long nTiInStd = 0, nTvInStd = 0; + + @DataPoint(name = "ti_count", description = "number of transition loci") + long nTi = 0; + @DataPoint(name = "tv_count", description = "number of transversion loci") + long nTv = 0; + @DataPoint(name = "ti_count_std", description = "number of transition sites in the std") + long nTiInStd = 0; + @DataPoint(name = "tv_count_std", description = "number of transversion sites in the std") + long nTvInStd = 0; public TiTvVariantEvaluator(VariantEval2Walker parent) { // don't do anything } - public boolean enabled() { return true; } - + public boolean enabled() { + return true; + } + public String getName() { return "titv"; } @@ -27,18 +39,20 @@ public class TiTvVariantEvaluator extends VariantEvaluator { } public void updateTiTv(VariantContext vc, boolean updateStandard) { - if ( vc != null && vc.isSNP() && vc.isBiallelic() ) { - if ( vc.isTransition() ) { - if ( updateStandard ) nTiInStd++; else nTi++; + if (vc != null && vc.isSNP() && vc.isBiallelic()) { + if (vc.isTransition()) { + if (updateStandard) nTiInStd++; + else nTi++; } else { - if ( updateStandard ) nTvInStd++; else nTv++; + if (updateStandard) nTvInStd++; + else nTv++; } } } public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( vc1 != null ) updateTiTv(vc1, false); - if ( vc2 != null ) updateTiTv(vc2, true); + if (vc1 != null) updateTiTv(vc1, false); + if (vc2 != null) updateTiTv(vc2, true); //if ( vc1 == null && vc2 != null && vc2.isSNP() && vc2.isBiallelic() ) // System.out.printf("VC2 = %s%n", vc2); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java index aa600232c..dc221ee8c 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import java.util.List; import java.util.Arrays; @@ -17,6 +18,7 @@ import java.util.Arrays; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ +@Analysis(name = "Validation Rate", description = "Validation Rate") public class ValidationRate extends VariantEvaluator { // todo -- subset validation data by list of samples, if provided @@ -26,7 +28,9 @@ public class ValidationRate extends VariantEvaluator { class SiteStats { long nPoly = 0, nMono = 0, nNoCall = 0; - double polyPercent() { return 100 * rate(nPoly, nPoly + nMono + nNoCall); } + double polyPercent() { + return 100 * rate(nPoly, nPoly + nMono + nNoCall); + } } private SiteStats validationStats = new SiteStats(); @@ -68,11 +72,14 @@ public class ValidationRate extends VariantEvaluator { "PPV", "Sensitivity"); // making it a table + public List getTableHeader() { return HEADER; } - public boolean enabled() { return true; } + public boolean enabled() { + return true; + } public List> getTableRows() { return Arrays.asList(Arrays.asList(summaryLine().split(" "))); @@ -81,26 +88,25 @@ public class ValidationRate extends VariantEvaluator { public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { String interesting = null; - if ( validation != null && validation.hasGenotypes() && validation.isNotFiltered() ) { + if (validation != null && validation.hasGenotypes() && validation.isNotFiltered()) { SiteStats overlap; - if ( validation.isPolymorphic() ) { + if (validation.isPolymorphic()) { validationStats.nPoly++; overlap = evalOverlapAtPoly; - if ( eval == null || eval.isMonomorphic() ) + if (eval == null || eval.isMonomorphic()) interesting = "ValidationStatus=FN"; - } - else { + } else { validationStats.nMono++; overlap = evalOverlapAtMono; - if ( eval != null && eval.isPolymorphic() ) + if (eval != null && eval.isPolymorphic()) interesting = "ValidationStatus=FP"; } - if ( eval == null ) + if (eval == null) overlap.nNoCall++; - else if ( eval.isPolymorphic() ) + else if (eval.isPolymorphic()) overlap.nPoly++; else overlap.nMono++; diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java b/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java index 5f15379ef..37b1deb8e 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/ReportMarshaller.java @@ -33,10 +33,7 @@ import org.broadinstitute.sting.utils.StingException; import java.io.*; import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; +import java.util.*; /** @@ -52,6 +49,7 @@ public class ReportMarshaller { // the aggregation of all our analyses private Node root; private File writeLocation; + /** * create a marshaled object * @@ -64,10 +62,8 @@ public class ReportMarshaller { } private void init(String reportName, File filename) { - root = new Node(reportName); - Node title= new Node("title"); - title.addChild(new Node(reportName)); - root.addChild(title); + root = new Node("report", reportName, "the overarching report object"); + root.addChild(new Node("title", reportName, "title of the report")); this.writeLocation = filename; } @@ -77,7 +73,7 @@ public class ReportMarshaller { * @param reportName the report name */ public ReportMarshaller(String reportName, File filename) { - init(reportName,filename); + init(reportName, filename); temp = createTemplate(); } @@ -101,12 +97,40 @@ public class ReportMarshaller { root.addChild(analysis); } + /** + * add an analysis module to the output source + * + * @param toMarshall the object to marshall + */ + public void write(List prependNodes, Object toMarshall) { + // Create a context to add data to + HashMap analysisMap = new HashMap(); + AnalysisModuleScanner moduleScanner = new AnalysisModuleScanner(toMarshall); + + Node analysis = addAnalysis(moduleScanner); + + analysis.addAllChildren(getParameterNodes(toMarshall, moduleScanner)); + analysis.addAllChildren(getDataPointNodes(toMarshall, moduleScanner)); + + // prepend the list of nodes passed in + Node currChild = null; + for (Node n : prependNodes) { + if (currChild == null) { + root.addChild(n); + currChild = n; + } else { + currChild.addChild(n); + currChild = n; + } + } + // add this analysis to the root node + if (currChild == null) root.addChild(analysis); + else currChild.addChild(analysis); + } + + private Node addAnalysis(AnalysisModuleScanner moduleScanner) { - Node analysis = new Node(moduleScanner.getAnalysis().name()); - Node description = new Node("description"); - description.createSubNode(moduleScanner.getAnalysis().description()); - analysis.addChild(description); - return analysis; + return new Node("analysis", moduleScanner.getAnalysis().name(), moduleScanner.getAnalysis().description()); } /** @@ -119,7 +143,9 @@ public class ReportMarshaller { private Collection getParameterNodes(Object toMarshall, AnalysisModuleScanner moduleScanner) { Collection nodes = new ArrayList(); for (Field f : moduleScanner.getParameters().keySet()) { - Node node = new Node(moduleScanner.getParameters().get(f).name().equals("") ? f.getName() : moduleScanner.getParameters().get(f).name()); + Node node = new Node("parameter", + moduleScanner.getParameters().get(f).name().equals("") ? f.getName() : moduleScanner.getParameters().get(f).name(), + moduleScanner.getParameters().get(f).description()); addChildNodeFromField(toMarshall, f, node); nodes.add(node); } @@ -136,14 +162,18 @@ public class ReportMarshaller { private Collection getDataPointNodes(Object toMarshall, AnalysisModuleScanner moduleScanner) { Collection nodes = new ArrayList(); for (Field f : moduleScanner.getData().keySet()) { - Node node = new Node(moduleScanner.getData().get(f).name().equals("") ? f.getName() : moduleScanner.getData().get(f).name()); + Node node = new Node("data_point", + moduleScanner.getData().get(f).name().equals("") ? f.getName() : moduleScanner.getData().get(f).name(), + moduleScanner.getData().get(f).description()); addChildNodeFromField(toMarshall, f, node); nodes.add(node); } return nodes; } - /** call the method to finalize the report */ + /** + * call the method to finalize the report + */ public void close() { Writer out = null; try { @@ -154,7 +184,7 @@ public class ReportMarshaller { try { // add the data to a map Map map = new HashMap(); - map.put("root",root); + map.put("root", root); temp.process(map, out); out.flush(); } catch (TemplateException e) { @@ -174,7 +204,7 @@ public class ReportMarshaller { cfg.setObjectWrapper(new DefaultObjectWrapper()); Template temp = null; try { - temp = cfg.getTemplate("machineReadable.ftl"); + temp = cfg.getTemplate("myTestTemp.ftl"); // TODO: obviously this has to be changed to a factory or something like that } catch (IOException e) { e.printStackTrace(); } @@ -183,9 +213,10 @@ public class ReportMarshaller { /** * helper method for adding a Node to the specified node, given the field + * * @param toMarshall the object which contains the specified field - * @param f the field - * @param node the node to add a child node to + * @param f the field + * @param node the node to add a child node to */ private static void addChildNodeFromField(Object toMarshall, Field f, Node node) { f.setAccessible(true); diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/TableFormatter.java b/java/src/org/broadinstitute/sting/playground/utils/report/TableFormatter.java deleted file mode 100644 index 3527af51b..000000000 --- a/java/src/org/broadinstitute/sting/playground/utils/report/TableFormatter.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.playground.utils.report; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; - - -/** - * - * @author aaron - * - * Class TableFormatter - * - * a helper class for formatting a table of data. Given a collection, it formats it into rows and columns. - * If it's a Map, values are emited as key, value, key, value, so it may make sense to have colums set to 2. - * - * TODO: Needs some cleanup - * - */ -public class TableFormatter { - public Object tableEntries; // data storage - private int columns = 2; // default to two columns of data, overridden in the constructor - - - public TableFormatter(Collection tbl, int columns) { - tableEntries = tbl; - this.columns = columns; - } - - public TableFormatter(Object tbl) { - tableEntries = tbl; - } - - /** - * convert the underlying object to a list of lists, for convenient output - * @return a list of list of objects (a list for each row, then a list of row members) - */ - public List> toRows() { - if (tableEntries instanceof Collection) - return collectionToTable(((Collection)tableEntries)); - else if (tableEntries instanceof Map) - return mapToTable(((Map)tableEntries)); - else - throw new UnsupportedOperationException("underlying collection must be an instance of type Collection or of type Map"); - } - - private List> mapToTable(Map collection) { - ArrayList> list = new ArrayList>(); - for (Object obj : collection.keySet()) { - List currentRow = new ArrayList(); - currentRow.add(obj); - Object data = collection.get(obj); - currentRow.add(data.toString()); - - list.add(currentRow); - } - - return list; - } - - private List> collectionToTable(Collection collection) { - ArrayList> list = new ArrayList>(); - List currentRow = new ArrayList(); - for (Object obj : collection) { - if (currentRow.size() >= columns) { - list.add(currentRow); - currentRow = new ArrayList(); - } - currentRow.add(obj); - } - list.add(currentRow); - return list; - } - -} diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java b/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java index 330372c5d..ec004f8a2 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/utils/ComplexDataUtils.java @@ -1,20 +1,22 @@ package org.broadinstitute.sting.playground.utils.report.utils; +import org.broadinstitute.sting.utils.Utils; + import java.util.*; /** - * - * @author aaron - * - * Class ComplexDataUtils - * - * This class contains methods and techniques for breaking down complex data in the output system + * @author aaron + *

+ * Class ComplexDataUtils + *

+ * This class contains methods and techniques for breaking down complex data in the output system */ public class ComplexDataUtils { /** * convert any string -> object pairing into a string keyed tree + * * @param obj the object * @return a mapping of the name to the associated value tree. All non-leaf nodes will be Strings */ @@ -22,41 +24,50 @@ public class ComplexDataUtils { Collection nodes = new ArrayList(); // the simplest case - if (obj == null) nodes.add(new Node("")); // throw new IllegalStateException("object is null"); + if (obj == null) + nodes.add(new Node("", "", "")); // throw new IllegalStateException("object is null"); else if (obj instanceof TableType) - nodes.add(tableToNode((TableType)obj, ((TableType)obj).getName())); + nodes.add(tableToNode((TableType) obj, ((TableType) obj).getName())); else if (obj instanceof Map) { - for (Object key : ((Map)obj).keySet()) { - Node keyNode = new Node(key.toString()); + for (Object key : ((Map) obj).keySet()) { + Node keyNode = new Node("key", key.toString(), "map key"); nodes.add(keyNode); - keyNode.addAllChildren(resolveObjects(((Map)obj).get(key))); + keyNode.addAllChildren(resolveObjects(((Map) obj).get(key))); } } else if (obj instanceof Collection) - nodes.addAll(listToNode((Collection)obj, "collection")); + nodes.addAll(listToNode((Collection) obj, "collection")); else if (obj.getClass().isArray()) - nodes.addAll(listToNode(Arrays.asList(obj),"array")); + nodes.addAll(listToNode(Arrays.asList(obj), "array")); else - nodes.add(new Node(obj.toString())); + nodes.add(new Node(obj.getClass().getSimpleName(), obj.toString(), "value")); return nodes; } /** * given a TableType object, make it into a tree using maps. + * * @param table the table type to convert into a map * @return */ private static Node tableToNode(TableType table, String name) { - Node root = new Node(name); + Node root = new Node("table", name, "Table"); Object[] rows = table.getRowKeys(); Object[] cols = table.getColumnKeys(); + + // add the columns names + Node col_names = new Node("col_names", "col_names", "the column names, ~ seperated", false); + for (Object col : cols) + col_names.addChild(new Node("col", col.toString(), "a column name", false)); + root.addChild(col_names); + for (int x = 0; x < table.getRowKeys().length; x++) { - Node row = new Node(rows[x].toString()); + Node row = new Node("row", rows[x].toString(), "a row"); root.addChild(row); - for (int y = 0; y < table.getRowKeys().length; y++) { - Node col = new Node(cols[y].toString()); + for (int y = 0; y < table.getColumnKeys().length; y++) { + Node col = new Node("column", cols[y].toString(), "a column"); row.addChild(col); - col.addChild(new Node(table.getCell(x,y))); + col.addChild(new Node("cell(" + x + "," + y + ")", table.getCell(x, y), "a cell")); } } return root; @@ -64,6 +75,7 @@ public class ComplexDataUtils { /** * given a Collection object, make it into a tree using maps. + * * @param coll the collection to iterate, and turn into a list * @return a mapping of String to Object */ @@ -71,11 +83,10 @@ public class ComplexDataUtils { Collection nodes = new ArrayList(); Iterator iter = coll.iterator(); for (int x = 0; x < coll.size(); x++) { - Node value = new Node(String.valueOf(x)); - value.addChild(new Node(iter.next().toString())); + Node value = new Node("column " + x, String.valueOf(x), "column"); + value.addChild(new Node("value " + x, iter.next().toString(), "value")); nodes.add(value); } return nodes; } - } diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java b/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java index 6eb4f2491..4560c5c76 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/utils/Node.java @@ -4,15 +4,27 @@ import java.util.Collection; import java.util.LinkedHashSet; /** -* a node, extracted using the new report output system. -*/ + * a node, extracted using the new report output system. + */ public class Node { - public String value; - public String description; // if one is available + public final String name; + public final String value; + public final String description; + public final boolean display; // is this node an output node, or a node for tracking internal data? true if output node public Collection children; - public Node(String value) { + public Node(String name, String value, String description) { this.value = value; + this.name = name; + this.description = description; + display = true; + } + + public Node(String name, String value, String description, boolean display) { + this.value = value; + this.name = name; + this.description = description; + this.display = display; } public void addChild(Node child) { @@ -25,20 +37,27 @@ public class Node { this.children.addAll(children); } - public Boolean getComplex() { return (children != null && children.size() > 0); } + public Boolean getComplex() { + return (children != null && children.size() > 0); + } /** * a convenience method for adding a new sub-node with the specified value + * * @param value the value of the sub-node */ - public void createSubNode(String value) { - addChild(new Node(value)); + public void createSubNode(String name, String value, String description) { + addChild(new Node(name, value, description)); } public String getValue() { return value; } + public String getName() { + return name; + } + public String getDescription() { return description; } @@ -46,4 +65,8 @@ public class Node { public Collection getChildren() { return children; } + + public boolean getDisplay() { + return display; + } }