From c89ba7b1a4d65fdbc02fd8d8c2826f2df2fe3722 Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 3 Feb 2010 16:03:19 +0000 Subject: [PATCH] improvements to variant eval 2. Now has titv calculations and mendelian violation detect support. we only make ~80 mendelian violations in 380K calls for the YRI trio, in case you are interested git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2768 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/Genotype.java | 63 +++++- .../varianteval2/CountVariants.java | 19 +- .../varianteval2/DbSNPPercentage.java | 28 +-- .../MendelianViolationEvaluator.java | 191 ++++++++++++++++++ .../varianteval2/TiTvVariantEvaluator.java | 4 +- .../varianteval2/VariantEval2Walker.java | 89 +++++++- .../varianteval2/VariantEvaluator.java | 27 ++- 7 files changed, 371 insertions(+), 50 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java index 5daef835d..783b446ac 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java @@ -24,12 +24,22 @@ public class Genotype extends AttributedObject { this(resolveAlleles(vc, alleles), sampleName, negLog10PError); } + public Genotype(VariantContext vc, List alleles, String sampleName) { + this(resolveAlleles(vc, alleles), sampleName); + } + public Genotype(List alleles, String sampleName, double negLog10PError) { setAlleles(alleles); setSampleName(sampleName); setNegLog10PError(negLog10PError); } + public Genotype(List alleles, String sampleName) { + setAlleles(alleles); + setSampleName(sampleName); + } + + /** * @return the alleles for this genotype */ @@ -46,6 +56,10 @@ public class Genotype extends AttributedObject { return al; } + public Allele getAllele(int i) { + return alleles.get(i); + } + private final static String ALLELE_SEPARATOR = "/"; public String getGenotypeString() { return Utils.join(ALLELE_SEPARATOR, getAllelesString()); @@ -89,8 +103,10 @@ public class Genotype extends AttributedObject { /** * @return true if all observed alleles are the same (regardless of whether they are ref or alt) */ - public boolean isHom() { return getType() == Type.HOM_REF || getType() == Type.HOM_VAR; } - + public boolean isHom() { return isHomRef() || isHomVar(); } + public boolean isHomRef() { return getType() == Type.HOM_REF; } + public boolean isHomVar() { return getType() == Type.HOM_VAR; } + /** * @return true if we're het (observed alleles differ) */ @@ -177,6 +193,47 @@ public class Genotype extends AttributedObject { } public String toString() { - return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), getNegLog10PError(), getAttributes()); + return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), 10 * getNegLog10PError(), getAttributes()); + } + + public String toBriefString() { + return String.format("%s:Q%.2f", getAlleles(), 10 * getNegLog10PError()); + } + + public boolean sameGenotype(Genotype other) { + return sameGenotype(other, true); + } + + public boolean sameGenotype(Genotype other, boolean ignorePhase) { + if ( getPloidy() != other.getPloidy() ) + return false; // gotta have the same number of allele to be equal for gods sake + + // algorithms are wildly different if phase is kept of ignored + if ( ignorePhase ) { + for ( int i = 0; i < getPloidy(); i++) { + Allele myAllele = getAllele(i); + Allele otherAllele = other.getAllele(i); + if ( ! myAllele.basesMatch(otherAllele) ) + return false; + } + } else { + List otherAlleles = other.getAlleles(); + for ( Allele myAllele : getAlleles() ) { + Allele alleleToRemove = null; + for ( Allele otherAllele : otherAlleles ) { + if ( myAllele.basesMatch(otherAllele) ) { + alleleToRemove = otherAllele; + break; + } + } + + if ( alleleToRemove != null ) + otherAlleles.remove(alleleToRemove); + else + return false; // we couldn't find our allele + } + } + + return true; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java index 14a66b91f..87c636a8b 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java @@ -26,17 +26,12 @@ public class CountVariants extends VariantEvaluator { long nHomRef = 0; long nHomVar = 0; - private double rate(long n) { - return n / (1.0 * Math.max(nProcessedLoci, 1)); + public CountVariants(VariantEval2Walker parent) { + // don't do anything } - private long inverseRate(long n) { - return n == 0 ? 0 : nProcessedLoci / Math.max(n, 1); - } - - private double ratio(long num, long denom) { - return ((double)num) / (Math.max(denom, 1)); - } + private double perLocusRate(long n) { return rate(n, nProcessedLoci); } + private long perLocusRInverseRate(long n) { return inverseRate(n, nProcessedLoci); } public String getName() { return "counter"; @@ -87,11 +82,11 @@ public class CountVariants extends VariantEvaluator { "%.2e %d %.2f " + "%.2f %d %.2f", nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci, - rate(nVariantLoci), inverseRate(nVariantLoci), + perLocusRate(nVariantLoci), perLocusRInverseRate(nVariantLoci), nSNPs, nDeletions, nInsertions, nComplex, nHomRef, nHets, nHomVar, - rate(nHets), inverseRate(nHets), ratio(nHets, nHomVar), - rate(nDeletions + nInsertions), inverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions)); + perLocusRate(nHets), perLocusRInverseRate(nHets), ratio(nHets, nHomVar), + perLocusRate(nDeletions + nInsertions), perLocusRInverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions)); } private static List HEADER = diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java index 62db4c407..319a58a6f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java @@ -29,6 +29,10 @@ public class DbSNPPercentage extends VariantEvaluator { private long nSNPsAtdbSNPs = 0; private long nConcordant = 0; + public DbSNPPercentage(VariantEval2Walker parent) { + // don't do anything + } + public String getName() { return "dbsnp_percentage"; } @@ -86,28 +90,8 @@ public class DbSNPPercentage extends VariantEvaluator { * * @return */ - public double dbSNPRate() { - return nSNPsAtdbSNPs() / (1.0 * Math.max(nEvalSNPs(), 1)); - } - - public double concordanceRate() { - return nConcordant() / (1.0 * Math.max(nSNPsAtdbSNPs(), 1)); - } - -// public static Variation getFirstRealSNP(RODRecordList dbsnpList) { -// if (dbsnpList == null) -// return null; -// -// Variation dbsnp = null; -// for (ReferenceOrderedDatum d : dbsnpList) { -// if (((Variation) d).isSNP() && (! (d instanceof RodVCF) || ! ((RodVCF)d).isFiltered())) { -// dbsnp = (Variation)d; -// break; -// } -// } -// -// return dbsnp; -// } + public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); } + public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); } public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java new file mode 100755 index 000000000..7cf90e40f --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java @@ -0,0 +1,191 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; + +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; +import org.broadinstitute.sting.utils.StingException; + +import java.util.List; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +/** + * Mendelian violation detection and counting + * + * a violation looks like: + * Suppose dad = A/B and mom = C/D + * The child can be [A or B] / [C or D]. + * If the child doesn't match this, the site is a violation + * + * Some examples: + * + * mom = A/A, dad = C/C + * child can be A/C only + * + * mom = A/C, dad = C/C + * child can be A/C or C/C + * + * mom = A/C, dad = A/C + * child can be A/A, A/C, C/C + * + * The easiest way to do this calculation is to: + * + * Get alleles for mom => A/B + * Get alleles for dad => C/D + * Make allowed genotypes for child: A/C, A/D, B/C, B/D + * Check that the child is one of these. + */ +public class MendelianViolationEvaluator extends VariantEvaluator { + long nVariants, nViolations, nOverCalls, nUnderCalls; + String mom, dad, child; + VariantEval2Walker parent; + + private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); + + public MendelianViolationEvaluator(VariantEval2Walker parent) { + this.parent = parent; + + if ( enabled() ) { + Matcher m = FAMILY_PATTERN.matcher(parent.FAMILY_STRUCTURE); + if ( m.matches() ) { + //System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE); + mom = m.group(1); + dad = m.group(2); + child = m.group(3); + parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s", parent.FAMILY_STRUCTURE, mom, dad, child)); + + } else { + throw new IllegalArgumentException("Malformatted family structure string: " + parent.FAMILY_STRUCTURE + " required format is mom+dad=child"); + } + } + } + + private boolean enabled() { + return parent.FAMILY_STRUCTURE != null; + } + + private double getQThreshold() { + return parent.MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred + } + + public String getName() { + return "mendelian_violations"; + } + + public int getComparisonOrder() { + return 1; // we only need to see each eval track + } + + public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci + nVariants++; + + Genotype momG = vc.getGenotype(mom); + Genotype dadG = vc.getGenotype(dad); + Genotype childG = vc.getGenotype(child); + + if ( momG.getNegLog10PError() > getQThreshold() && dadG.getNegLog10PError() > getQThreshold() && childG.getNegLog10PError() > getQThreshold() ) { + // all genotypes are good, so let's see if child is a violation + + if ( isViolation(vc, momG, dadG, childG) ) { + nViolations++; + + String label = null; + switch ( getViolationType( vc, momG, dadG, childG ) ) { + case UNDER_CALL: + nUnderCalls++; + label = "under_called"; + break; + case OVER_CALL: + nOverCalls++; + label = "over_called"; + break; + default: + throw new StingException("BUG: unexpected violation type at " + vc); + + } + + String why = String.format("Mendelian violation %s: at %s m=%s d=%s c=%s", label, vc.getLocation(), momG.toBriefString(), dadG.toBriefString(), childG.toBriefString()); + addInterestingSite(why , vc); + } + } + } + } + + /** + * Are we under or over calling? + * + * Assuming this is a bialleic locus, we then have 2 alleles A and B. There are really two types of violations: + * + * Undercall: where the child is A/A but parent genotypes imply that child must carry at least one B allele + * Overall: where the child carries a B allele but this B allele couldn't have been inherited from either parent + * + * The way to determine this is to look at mom and dad separately. If the child doesn't carry at least one + * allele from each parent, it's an under calls. Otherwise it's an overcall. + */ + public ViolationType getViolationType(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { + switch ( childG.getType() ) { + case HOM_REF: + return ViolationType.UNDER_CALL; // if you have to undercalled as a hom ref child + case HET: + // the only two violations of a het is where both parents are hom. If they are hom ref, you overcalled, + // otherwise you undercalled + return momG.isHomRef() ? ViolationType.OVER_CALL : ViolationType.UNDER_CALL; + case HOM_VAR: + return ViolationType.OVER_CALL; // if you have to overcalled as a hom var child + default: + throw new StingException("BUG: unexpected child genotype class " + childG); + } + } + + private enum ViolationType { + UNDER_CALL, OVER_CALL + } + + public boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { + VariantContext momVC = vc.subContextFromGenotypes(momG); + VariantContext dadVC = vc.subContextFromGenotypes(dadG); + int i = 0; + for ( Allele momAllele : momVC.getAlleles() ) { + for ( Allele dadAllele : dadVC.getAlleles() ) { + Genotype possibleChild = new Genotype(Arrays.asList(momAllele, dadAllele), "possibleGenotype" + i); + if ( childG.sameGenotype(possibleChild, false) ) { + return false; + } + } + } + + return true; + } + + public String toString() { + return getName() + ": " + summaryLine(); + } + + private String summaryLine() { + return String.format("%d %d %.2e %d %.2e %.2f %d %.2e %.2f", + nVariants, nViolations, rate(nViolations, nVariants), + nOverCalls, rate(nOverCalls, nVariants), ratio(nOverCalls, nViolations), + nUnderCalls, rate(nUnderCalls, nVariants), ratio(nUnderCalls, nViolations)); + } + + private static List HEADER = + Arrays.asList("nVariants", + "nViolations", "violationRate", + "nOverCalls", "overCallRate", "overCallFraction", + "nUnderCalls", "underCallRate", "underCallFraction"); + + // making it a table + public List getTableHeader() { + return HEADER; + } + + public List> getTableRows() { + return Arrays.asList(Arrays.asList(summaryLine().split(" "))); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java index f72fc40eb..1fab3ccc0 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java @@ -14,8 +14,8 @@ public class TiTvVariantEvaluator extends VariantEvaluator { long nTi = 0, nTv = 0; long nTiInStd = 0, nTvInStd = 0; - private double ratio(long num, long denom) { - return ((double)num) / (Math.max(denom, 1)); + public TiTvVariantEvaluator(VariantEval2Walker parent) { + // don't do anything } public String getName() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java index ed83fbac5..37f64434e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java @@ -10,13 +10,46 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors; import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextUtils; +import org.apache.log4j.Logger; import java.util.*; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; + + +// +// todo -- write a simple column table system and have the evaluators return this instead of the list> objects +// todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers) +// todo -- multiple sample concordance tool (genotypes in eval vs. genotypes in truth) +// todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval). Needs to process subset of samples in true (pools) +// todo -- clustered SNP counter +// todo -- HWEs +// todo -- Validation data analysis from VE1? What is it and should we transition it over? +// todo -- indel metrics [count of sizes in/del should be in CountVariants] +// todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields +// + +// +// Todo -- should really include argument parsing @annotations from subclass in this walker. Very +// todo -- useful general capability. Right now you need to add arguments to VariantEval2 to handle new +// todo -- evaluation arguments (which is better than passing a string!) +// + +// +// todo -- the whole organization only supports a single eval x comp evaluation. We need to instantiate +// todo -- new contexts for each comparison object too! +// /** * Test routine for new VariantContext object */ public class VariantEval2Walker extends RodWalker { + // -------------------------------------------------------------------------------------------------------------- + // + // walker arguments + // + // -------------------------------------------------------------------------------------------------------------- + // todo -- add doc string @Argument(shortName="select", doc="", required=false) protected String[] SELECT_EXPS = {"QUAL > 500.0", "HARD_TO_VALIDATE==1", "GATK_STANDARD==1"}; @@ -28,6 +61,27 @@ public class VariantEval2Walker extends RodWalker { @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) protected String[] KNOWN_NAMES = {"dbsnp"}; + // + // Arguments for Mendelian Violation calculations + // + @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + protected String FAMILY_STRUCTURE; + + @Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) + protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; + + @Argument(shortName="PI", fullName="PrintInterestingSites", doc="If provided, interesting sites in the unselected, called set will be printed", required=false) + protected boolean PRINT_INTERESTING_SITES = false; + +// @Argument(shortName="PIA", fullName="PrintAllInterestingSites", doc="If provided, interesting sites will be printed for all sets. Verbose", required=false) +// protected boolean PRINT_ALL_INTERESTING_SITES = false; + + // -------------------------------------------------------------------------------------------------------------- + // + // private walker data + // + // -------------------------------------------------------------------------------------------------------------- + /** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */ private class EvaluationContext extends HashMap> { // useful for typing @@ -60,6 +114,7 @@ public class VariantEval2Walker extends RodWalker { // initialize // // -------------------------------------------------------------------------------------------------------------- + public void initialize() { determineAllEvalations(); List selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS); @@ -82,7 +137,7 @@ public class VariantEval2Walker extends RodWalker { private void determineAllEvalations() { evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class); - for ( VariantEvaluator e : instantiateEvalationsSet() ) { + for ( VariantEvaluator e : instantiateEvalationsSet(false, null) ) { // for collecting purposes variantEvaluationNames.add(e.getName()); logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass()); @@ -91,15 +146,25 @@ public class VariantEval2Walker extends RodWalker { Collections.sort(variantEvaluationNames); } - private Set instantiateEvalationsSet() { + private Set instantiateEvalationsSet(boolean baseline, String name) { Set evals = new HashSet(); + Object[] args = new Object[]{this}; + Class[] argTypes = new Class[]{this.getClass()}; + for ( Class c : evaluationClasses ) { try { - evals.add((VariantEvaluator) c.newInstance()); + Constructor constructor = c.getConstructor(argTypes); + VariantEvaluator eval = (VariantEvaluator)constructor.newInstance(args); + if ( baseline ) eval.printInterestingSites(name); + evals.add(eval); } catch (InstantiationException e) { - throw new StingException(String.format("Cannot instantiate annotation class '%s': must be concrete class", c.getSimpleName())); + throw new StingException(String.format("Cannot instantiate class '%s': must be concrete class", c.getSimpleName())); + } catch (NoSuchMethodException e) { + throw new StingException(String.format("Cannot find expected constructor for class '%s': must have constructor accepting a single VariantEval2Walker object", c.getSimpleName())); } catch (IllegalAccessException e) { - throw new StingException(String.format("Cannot instantiate annotation class '%s': must have no-arg constructor", c.getSimpleName())); + throw new StingException(String.format("Cannot instantiate class '%s':", c.getSimpleName())); + } catch (InvocationTargetException e) { + throw new StingException(String.format("Cannot instantiate class '%s':", c.getSimpleName())); } } @@ -111,7 +176,8 @@ public class VariantEval2Walker extends RodWalker { for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) { for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) { - group.put(subname + "." + filteredName, instantiateEvalationsSet()); + String name = subname + "." + filteredName; + group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name)); } } @@ -146,7 +212,8 @@ public class VariantEval2Walker extends RodWalker { // now call the single or paired update function switch ( evaluation.getComparisonOrder() ) { case 1: - if ( evalWantsVC && vc != null ) evaluation.update1(vc, tracker, ref, context); + if ( evalWantsVC && vc != null ) + evaluation.update1(vc, tracker, ref, context); break; case 2: for ( VariantContext comp : comps.values() ) { @@ -193,6 +260,8 @@ public class VariantEval2Walker extends RodWalker { comps.put(compName, getVariantContext(compName, tracker, context)); } + // todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp + if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track. Yes, I know..."); return comps; } @@ -314,14 +383,16 @@ public class VariantEval2Walker extends RodWalker { String keyWord = contextName + "." + evalSubgroupName; if ( first ) { - out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t\t", eval.getTableHeader())); + out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader())); first = false; } for ( List row : eval.getTableRows() ) - out.printf("%20s %s %s%n", evalName, formatKeyword(keyWord), Utils.join("\t\t", row)); + out.printf("%20s %s %s%n", evalName, formatKeyword(keyWord), Utils.join("\t", row)); } } } } + + protected Logger getLogger() { return logger; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java index 35fd84af2..522d1a538 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java @@ -16,7 +16,8 @@ import java.util.ArrayList; * To change this template use File | Settings | File Templates. */ public abstract class VariantEvaluator { - protected boolean accumulateInterestingSites = false; + protected boolean accumulateInterestingSites = false, printInterestingSites = false; + protected String interestingSitePrefix = null; protected boolean processedASite = false; protected List interestingSites = new ArrayList(); @@ -24,9 +25,15 @@ public abstract class VariantEvaluator { // do we want to keep track of things that are interesting public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; } + public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; } public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; } public List getInterestingSites() { return interestingSites; } - protected void addInterestingSite(String why, VariantContext vc) { interestingSites.add(vc); } + protected void addInterestingSite(String why, VariantContext vc) { + if ( accumulateInterestingSites ) + interestingSites.add(vc); + if ( printInterestingSites ) + System.out.printf("%40s %s%n", interestingSitePrefix, why); + } public boolean processedAnySites() { return processedASite; } protected void markSiteAsProcessed() { processedASite = true; } @@ -46,4 +53,20 @@ public abstract class VariantEvaluator { // making it a table public abstract List getTableHeader(); public abstract List> getTableRows(); + + // + // useful common utility routines + // + protected double rate(long n, long d) { + return n / (1.0 * Math.max(d, 1)); + } + + protected long inverseRate(long n, long d) { + return n == 0 ? 0 : d / Math.max(n, 1); + } + + protected double ratio(long num, long denom) { + return ((double)num) / (Math.max(denom, 1)); + } + }