From 8938a4146de85009e30966350435c65691b5af98 Mon Sep 17 00:00:00 2001 From: depristo Date: Fri, 5 Feb 2010 13:37:04 +0000 Subject: [PATCH] moving varianteval2 to it's own dir git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2784 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval2/CountVariants.java | 108 ----- .../varianteval2/DbSNPPercentage.java | 110 ----- .../MendelianViolationEvaluator.java | 191 --------- .../varianteval2/TiTvVariantEvaluator.java | 65 --- .../varianteval2/VariantEval2Walker.java | 398 ------------------ .../varianteval2/VariantEvaluator.java | 72 ---- 6 files changed, 944 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java deleted file mode 100755 index 87c636a8b..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java +++ /dev/null @@ -1,108 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; - -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; -import org.broadinstitute.sting.utils.StingException; - -import java.util.List; -import java.util.Arrays; - -public class CountVariants extends VariantEvaluator { - long nProcessedLoci = 0; - long nCalledLoci = 0; - long nVariantLoci = 0; - long nRefLoci = 0; - - long nSNPs = 0; - long nInsertions = 0; - long nDeletions = 0; - long nComplex = 0; - - long nNoCalls = 0; - long nHets = 0; - long nHomRef = 0; - long nHomVar = 0; - - public CountVariants(VariantEval2Walker parent) { - // don't do anything - } - - private double perLocusRate(long n) { return rate(n, nProcessedLoci); } - private long perLocusRInverseRate(long n) { return inverseRate(n, nProcessedLoci); } - - public String getName() { - return "counter"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + 1; - } - - public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //nProcessedLoci++; - nCalledLoci++; - - if ( vc1.isVariant() ) nVariantLoci++; - switch ( vc1.getType() ) { - case NO_VARIATION: nRefLoci++; break; - case SNP: nSNPs++; break; - case INDEL: - if ( vc1.isInsertion() ) nInsertions++; else nDeletions++; - break; - case MIXED: nComplex++; break; - } - - for ( Genotype g : vc1.getGenotypes().values() ) { - switch ( g.getType() ) { - case NO_CALL: nNoCalls++; break; - case HOM_REF: nHomRef++; break; - case HET: nHets++; break; - case HOM_VAR: nHomVar++; break; - default: throw new StingException("BUG: Unexpected genotype type: " + g); - } - } - } - - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %d %d " + - "%.2e %d " + - "%d %d %d %d " + - "%d %d %d " + - "%.2e %d %.2f " + - "%.2f %d %.2f", - nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci, - perLocusRate(nVariantLoci), perLocusRInverseRate(nVariantLoci), - nSNPs, nDeletions, nInsertions, nComplex, - nHomRef, nHets, nHomVar, - perLocusRate(nHets), perLocusRInverseRate(nHets), ratio(nHets, nHomVar), - perLocusRate(nDeletions + nInsertions), perLocusRInverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions)); - } - - private static List HEADER = - Arrays.asList("nProcessedLoci", "nCalledLoci", "nRefLoci", "nVariantLoci", - "variantRate", "variantRatePerBp", - "nSNPs", "nDeletions", "nInsertions", "nComplex", - "nHomRefGenotypes", "nHetGenotypes", "nHomVarGenotypes", - "heterozygosity", "heterozygosityPerBp", "hetHomRatio", - "indelRate", "indelRatePerBp", "deletionInsertionRatio"); - - // making it a table - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java deleted file mode 100755 index 319a58a6f..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/DbSNPPercentage.java +++ /dev/null @@ -1,110 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; - -import java.util.ArrayList; -import java.util.List; -import java.util.Arrays; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class DbSNPPercentage extends VariantEvaluator { - private long nDBSNPs = 0; - private long nEvalSNPs = 0; - private long nSNPsAtdbSNPs = 0; - private long nConcordant = 0; - - public DbSNPPercentage(VariantEval2Walker parent) { - // don't do anything - } - - public String getName() { - return "dbsnp_percentage"; - } - - public int getComparisonOrder() { - return 2; // we need to see each eval track and each comp track - } - - public long nDBSNPs() { return nDBSNPs; } - public long nEvalSNPs() { return nEvalSNPs; } - public long nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; } - public long nConcordant() { return nConcordant; } - public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); } - - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %d %d %d %.2f %.2f", - nDBSNPs(), nEvalSNPs(), nSNPsAtdbSNPs(), nConcordant(), nNovelSites(), 100 * dbSNPRate(), 100 * concordanceRate()); - } - - private static List HEADER = - Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant", "n_novel_snps", "dbsnp_rate", "concordance_rate"); - - // making it a table - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } - - /** - * Returns true if every allele in eval is also in dbsnp - * - * @param eval - * @param dbsnp - * @return - */ - public boolean discordantP(VariantContext eval, VariantContext dbsnp ) { - for ( Allele a : eval.getAlleles() ) { - if ( ! dbsnp.hasAllele(a, true) ) - return true; - } - - return false; - } - - - /** - * What fraction of the evaluated site variants were also found in the db? - * - * @return - */ - public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); } - public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); } - - public void update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); - boolean evalIsGood = eval != null && eval.isSNP(); - - if ( dbSNPIsGood ) nDBSNPs++; // count the number of dbSNP events - if ( evalIsGood ) nEvalSNPs++; // count the number of dbSNP events - - if ( dbSNPIsGood && evalIsGood ) { - nSNPsAtdbSNPs++; - - if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value - nConcordant++; - } - } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java deleted file mode 100755 index 274a447cd..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/MendelianViolationEvaluator.java +++ /dev/null @@ -1,191 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; - -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Allele; -import org.broadinstitute.sting.utils.StingException; - -import java.util.List; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.regex.Pattern; -import java.util.regex.Matcher; - -/** - * Mendelian violation detection and counting - * - * a violation looks like: - * Suppose dad = A/B and mom = C/D - * The child can be [A or B] / [C or D]. - * If the child doesn't match this, the site is a violation - * - * Some examples: - * - * mom = A/A, dad = C/C - * child can be A/C only - * - * mom = A/C, dad = C/C - * child can be A/C or C/C - * - * mom = A/C, dad = A/C - * child can be A/A, A/C, C/C - * - * The easiest way to do this calculation is to: - * - * Get alleles for mom => A/B - * Get alleles for dad => C/D - * Make allowed genotypes for child: A/C, A/D, B/C, B/D - * Check that the child is one of these. - */ -public class MendelianViolationEvaluator extends VariantEvaluator { - long nVariants, nViolations, nOverCalls, nUnderCalls; - String mom, dad, child; - VariantEval2Walker parent; - - private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); - - public MendelianViolationEvaluator(VariantEval2Walker parent) { - this.parent = parent; - - if ( enabled() ) { - Matcher m = FAMILY_PATTERN.matcher(parent.FAMILY_STRUCTURE); - if ( m.matches() ) { - //System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE); - mom = m.group(1); - dad = m.group(2); - child = m.group(3); - parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s", parent.FAMILY_STRUCTURE, mom, dad, child)); - - } else { - throw new IllegalArgumentException("Malformatted family structure string: " + parent.FAMILY_STRUCTURE + " required format is mom+dad=child"); - } - } - } - - private boolean enabled() { - return parent.FAMILY_STRUCTURE != null; - } - - private double getQThreshold() { - return parent.MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred - } - - public String getName() { - return "mendelian_violations"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci - nVariants++; - - Genotype momG = vc.getGenotype(mom); - Genotype dadG = vc.getGenotype(dad); - Genotype childG = vc.getGenotype(child); - - if ( momG.getNegLog10PError() > getQThreshold() && dadG.getNegLog10PError() > getQThreshold() && childG.getNegLog10PError() > getQThreshold() ) { - // all genotypes are good, so let's see if child is a violation - - if ( isViolation(vc, momG, dadG, childG) ) { - nViolations++; - - String label = null; - switch ( getViolationType( vc, momG, dadG, childG ) ) { - case UNDER_CALL: - nUnderCalls++; - label = "under_called"; - break; - case OVER_CALL: - nOverCalls++; - label = "over_called"; - break; - default: - throw new StingException("BUG: unexpected violation type at " + vc); - - } - - String why = String.format("Mendelian violation %s: at %s m=%s d=%s c=%s", label, vc.getLocation(), momG.toBriefString(), dadG.toBriefString(), childG.toBriefString()); - addInterestingSite(why , vc); - } - } - } - } - - /** - * Are we under or over calling? - * - * Assuming this is a bialleic locus, we then have 2 alleles A and B. There are really two types of violations: - * - * Undercall: where the child is A/A but parent genotypes imply that child must carry at least one B allele - * Overall: where the child carries a B allele but this B allele couldn't have been inherited from either parent - * - * The way to determine this is to look at mom and dad separately. If the child doesn't carry at least one - * allele from each parent, it's an under calls. Otherwise it's an overcall. - */ - public ViolationType getViolationType(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { - switch ( childG.getType() ) { - case HOM_REF: - return ViolationType.UNDER_CALL; // if you have to undercalled as a hom ref child - case HET: - // the only two violations of a het is where both parents are hom. If they are hom ref, you overcalled, - // otherwise you undercalled - return momG.isHomRef() ? ViolationType.OVER_CALL : ViolationType.UNDER_CALL; - case HOM_VAR: - return ViolationType.OVER_CALL; // if you have to overcalled as a hom var child - default: - throw new StingException("BUG: unexpected child genotype class " + childG); - } - } - - private enum ViolationType { - UNDER_CALL, OVER_CALL - } - - public boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) { - VariantContext momVC = vc.subContextFromGenotypes(momG); - VariantContext dadVC = vc.subContextFromGenotypes(dadG); - int i = 0; - for ( Allele momAllele : momVC.getAlleles() ) { - for ( Allele dadAllele : dadVC.getAlleles() ) { - Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele)); - if ( childG.sameGenotype(possibleChild, false) ) { - return false; - } - } - } - - return true; - } - - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %.2e %d %.2e %.2f %d %.2e %.2f", - nVariants, nViolations, rate(nViolations, nVariants), - nOverCalls, rate(nOverCalls, nVariants), ratio(nOverCalls, nViolations), - nUnderCalls, rate(nUnderCalls, nVariants), ratio(nUnderCalls, nViolations)); - } - - private static List HEADER = - Arrays.asList("nVariants", - "nViolations", "violationRate", - "nOverCalls", "overCallRate", "overCallFraction", - "nUnderCalls", "underCallRate", "underCallFraction"); - - // making it a table - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java deleted file mode 100755 index 1fab3ccc0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/TiTvVariantEvaluator.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; - -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; -import org.broadinstitute.sting.utils.StingException; - -import java.util.List; -import java.util.Arrays; - -public class TiTvVariantEvaluator extends VariantEvaluator { - long nTi = 0, nTv = 0; - long nTiInStd = 0, nTvInStd = 0; - - public TiTvVariantEvaluator(VariantEval2Walker parent) { - // don't do anything - } - - public String getName() { - return "titv"; - } - - public int getComparisonOrder() { - return 2; // we only need to see each eval track - } - - public void updateTiTv(VariantContext vc, boolean updateStandard) { - if ( vc != null && vc.isSNP() && vc.isBiallelic() ) { - if ( vc.isTransition() ) { - if ( updateStandard ) nTiInStd++; else nTi++; - } else { - if ( updateStandard ) nTvInStd++; else nTv++; - } - } - } - - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( vc1 != null ) updateTiTv(vc1, false); - if ( vc2 != null ) updateTiTv(vc2, true); - } - - public String toString() { - return getName() + ": " + summaryLine(); - } - - private String summaryLine() { - return String.format("%d %d %.2f %d %d %.2f", - nTi, nTv, ratio(nTi, nTv), - nTiInStd, nTvInStd, ratio(nTiInStd, nTvInStd)); - } - - private static List HEADER = - Arrays.asList("nTi", "nTv", "TiTvRatio", "nTiStandard", "nTvStandard", "TiTvRatioStandard"); - - // making it a table - public List getTableHeader() { - return HEADER; - } - - public List> getTableRows() { - return Arrays.asList(Arrays.asList(summaryLine().split(" "))); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java deleted file mode 100755 index 1c00f7d83..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java +++ /dev/null @@ -1,398 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors; -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextUtils; -import org.apache.log4j.Logger; - -import java.util.*; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; - - -// -// todo -- write a simple column table system and have the evaluators return this instead of the list> objects -// todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers) -// todo -- multiple sample concordance tool (genotypes in eval vs. genotypes in truth) -// todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval). Needs to process subset of samples in true (pools) -// todo -- clustered SNP counter -// todo -- HWEs -// todo -- Validation data analysis from VE1? What is it and should we transition it over? -// todo -- indel metrics [count of sizes in/del should be in CountVariants] -// todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields -// - -// -// Todo -- should really include argument parsing @annotations from subclass in this walker. Very -// todo -- useful general capability. Right now you need to add arguments to VariantEval2 to handle new -// todo -- evaluation arguments (which is better than passing a string!) -// - -// -// todo -- the whole organization only supports a single eval x comp evaluation. We need to instantiate -// todo -- new contexts for each comparison object too! -// - -/** - * Test routine for new VariantContext object - */ -public class VariantEval2Walker extends RodWalker { - // -------------------------------------------------------------------------------------------------------------- - // - // walker arguments - // - // -------------------------------------------------------------------------------------------------------------- - - // todo -- add doc string - @Argument(shortName="select", doc="", required=false) - protected String[] SELECT_EXPS = {"QUAL > 500.0", "HARD_TO_VALIDATE==1", "GATK_STANDARD==1"}; - - // todo -- add doc string - @Argument(shortName="selectName", doc="", required=false) - protected String[] SELECT_NAMES = {"q500plus", "low_mapq", "gatk_std_filters"}; - - @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) - protected String[] KNOWN_NAMES = {"dbsnp"}; - - // - // Arguments for Mendelian Violation calculations - // - @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) - protected String FAMILY_STRUCTURE; - - @Argument(shortName="MVQ", fullName="MendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) - protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; - - @Argument(shortName="PI", fullName="PrintInterestingSites", doc="If provided, interesting sites in the unselected, called set will be printed", required=false) - protected boolean PRINT_INTERESTING_SITES = false; - -// @Argument(shortName="PIA", fullName="PrintAllInterestingSites", doc="If provided, interesting sites will be printed for all sets. Verbose", required=false) -// protected boolean PRINT_ALL_INTERESTING_SITES = false; - - // -------------------------------------------------------------------------------------------------------------- - // - // private walker data - // - // -------------------------------------------------------------------------------------------------------------- - - /** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */ - private class EvaluationContext extends HashMap> { - // useful for typing - public String trackName, contextName; - VariantContextUtils.JexlVCMatchExp selectExp; - - public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) { - this.trackName = trackName; - this.contextName = contextName; - this.selectExp = selectExp; - } - } - - private HashMap contexts = new HashMap(); - private Set compNames = new HashSet(); - - private static String RAW_SET_NAME = "raw"; - private static String RETAINED_SET_NAME = "called"; - private static String FILTERED_SET_NAME = "filtered"; - private static String ALL_SET_NAME = "all"; - private static String KNOWN_SET_NAME = "known"; - private static String NOVEL_SET_NAME = "novel"; - - // Dynamically determined variantEvaluation classes - private List variantEvaluationNames = new ArrayList(); - private List> evaluationClasses = null; - - // -------------------------------------------------------------------------------------------------------------- - // - // initialize - // - // -------------------------------------------------------------------------------------------------------------- - - public void initialize() { - determineAllEvalations(); - List selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS); - - for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { - if ( d.getName().startsWith("eval") ) { - for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) { - addNewContext(d.getName(), d.getName() + "." + e.name, e); - } - addNewContext(d.getName(), d.getName() + ".all", null); - } else if ( d.getName().startsWith("dbsnp") || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { - compNames.add(d.getName()); - } else { - logger.info("Not evaluating ROD binding " + d.getName()); - } - } - - determineContextNamePartSizes(); - } - - private void determineAllEvalations() { - evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class); - for ( VariantEvaluator e : instantiateEvalationsSet(false, null) ) { - // for collecting purposes - variantEvaluationNames.add(e.getName()); - logger.debug("Including VariantEvaluator " + e.getName() + " of class " + e.getClass()); - } - - Collections.sort(variantEvaluationNames); - } - - private Set instantiateEvalationsSet(boolean baseline, String name) { - Set evals = new HashSet(); - Object[] args = new Object[]{this}; - Class[] argTypes = new Class[]{this.getClass()}; - - for ( Class c : evaluationClasses ) { - try { - Constructor constructor = c.getConstructor(argTypes); - VariantEvaluator eval = (VariantEvaluator)constructor.newInstance(args); - if ( baseline ) eval.printInterestingSites(name); - evals.add(eval); - } catch (InstantiationException e) { - throw new StingException(String.format("Cannot instantiate class '%s': must be concrete class", c.getSimpleName())); - } catch (NoSuchMethodException e) { - throw new StingException(String.format("Cannot find expected constructor for class '%s': must have constructor accepting a single VariantEval2Walker object", c.getSimpleName())); - } catch (IllegalAccessException e) { - throw new StingException(String.format("Cannot instantiate class '%s':", c.getSimpleName())); - } catch (InvocationTargetException e) { - throw new StingException(String.format("Cannot instantiate class '%s':", c.getSimpleName())); - } - } - - return evals; - } - - private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) { - EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp); - - for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) { - for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) { - String name = subname + "." + filteredName; - group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name)); - } - } - - contexts.put(contextName, group); - } - - // -------------------------------------------------------------------------------------------------------------- - // - // map - // - // -------------------------------------------------------------------------------------------------------------- - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases()); - - Map comps = getCompVariantContexts(tracker, context); - - // to enable walking over pairs where eval or comps have no elements - for ( EvaluationContext group : contexts.values() ) { - VariantContext vc = getVariantContext(group.trackName, tracker, context); - - //logger.debug(String.format("Updating %s of %s with variant", group.name, vc)); - for ( Map.Entry> namedEvaluations : group.entrySet() ) { - String evaluationName = namedEvaluations.getKey(); - Set evaluations = namedEvaluations.getValue(); - boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group); - - for ( VariantEvaluator evaluation : evaluations ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - evaluation.update0(tracker, ref, context); - - // now call the single or paired update function - switch ( evaluation.getComparisonOrder() ) { - case 1: - if ( evalWantsVC && vc != null ) - evaluation.update1(vc, tracker, ref, context); - break; - case 2: - for ( VariantContext comp : comps.values() ) { - evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context); - } - break; - default: - throw new StingException("BUG: Unexpected evaluation order " + evaluation); - } - } - } - } - - return 0; - } - - private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Map comps, EvaluationContext group) { - if ( vc == null ) - return true; - - if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() ) - return false; - - if ( evaluationName.contains(RETAINED_SET_NAME) && vc.isFiltered() ) - return false; - - boolean vcKnown = vcIsKnown(vc, comps, KNOWN_NAMES); - if ( evaluationName.contains(KNOWN_SET_NAME) && ! vcKnown ) - return false; - else if ( evaluationName.contains(NOVEL_SET_NAME) && vcKnown ) - return false; - - if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) ) - return false; - - // nothing invalidated our membership in this set - return true; - } - - private Map getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) { - Map comps = new HashMap(); - - for ( String compName : compNames ) { - comps.put(compName, getVariantContext(compName, tracker, context)); - } - - // todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp - if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track. Yes, I know..."); - return comps; - } - - private boolean vcIsKnown(VariantContext vc, Map comps, String[] knownNames ) { - for ( String knownName : knownNames ) { - VariantContext known = comps.get(knownName); - if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() ) - return true; - } - - return false; - } - -// can't handle this situation -// todo -- warning, this leads to some missing SNPs at complex loci, such as: -// todo -- 591 1 841619 841620 rs4970464 0 - A A -/C/T genomic mixed unknown 0 0 near-gene-3 exact 1 -// todo -- 591 1 841619 841620 rs62677860 0 + A A C/T genomic single unknown 0 0 near-gene-3 exact 1 -// -//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec)); - - private VariantContext getVariantContext(String name, RefMetaDataTracker tracker, AlignmentContext context) { - if ( tracker != null ) { - RODRecordList rodList = tracker.getTrackData(name, null); - if ( rodList != null ) { - for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { - if ( rec.getLocation().getStart() == context.getLocation().getStart() ) { - VariantContext vc = VariantContextAdaptors.toVariantContext(name, rec); - if ( vc != null ) { - return vc; - } - } - } - } - } - - return null; - } - - // -------------------------------------------------------------------------------------------------------------- - // - // reduce - // - // -------------------------------------------------------------------------------------------------------------- - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer point, Integer sum) { - return point + sum; - } - - public VariantEvaluator getEvalByName(String name, Set s) { - for ( VariantEvaluator e : s ) - if ( e.getName().equals(name) ) - return e; - return null; - } - - private > List sorted(Collection c ) { - List l = new ArrayList(c); - Collections.sort(l); - return l; - } - - private final static String CONTEXT_HEADER = "track.subset.novelty.filter"; - private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length; - private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS]; - static { - int i = 0; - for ( String elt : CONTEXT_HEADER.split("\\.") ) - nameSizes[i++] = elt.length(); - } - - private void determineContextNamePartSizes() { - for ( String contextName : sorted(contexts.keySet()) ) { - EvaluationContext group = contexts.get(contextName); - for ( String evalSubgroupName : sorted(group.keySet()) ) { - String keyWord = contextName + "." + evalSubgroupName; - String[] parts = keyWord.split("\\."); - if ( parts.length != N_CONTEXT_NAME_PARTS ) { - throw new StingException("Unexpected number of eval name parts " + keyWord + " length = " + parts.length + ", expected " + N_CONTEXT_NAME_PARTS); - } else { - for ( int i = 0; i < parts.length; i++ ) - nameSizes[i] = Math.max(nameSizes[i], parts[i].length()); - } - } - } - } - - private String formatKeyword(String keyWord) { - //System.out.printf("keyword %s%n", keyWord); - - StringBuilder s = new StringBuilder(); - int i = 0; - for ( String part : keyWord.split("\\.") ) { - //System.out.printf("part %s %d%n", part, nameSizes[i]); - s.append(String.format("%"+nameSizes[i]+"s ", part)); - i++; - } - - return s.toString(); - } - - public void onTraversalDone(Integer result) { - // todo -- this really needs to be pretty printed; use some kind of table organization - // todo -- so that we can load up all of the data in one place, analyze the widths of the columns - // todo -- and pretty print it - for ( String evalName : variantEvaluationNames ) { - boolean first = true; - out.printf("%n%n"); - // todo -- show that comp is dbsnp, etc. is columns - for ( String contextName : sorted(contexts.keySet()) ) { - EvaluationContext group = contexts.get(contextName); - - out.printf("%s%n", Utils.dupString('-', 80)); - for ( String evalSubgroupName : sorted(group.keySet()) ) { - Set evalSet = group.get(evalSubgroupName); - VariantEvaluator eval = getEvalByName(evalName, evalSet); - String keyWord = contextName + "." + evalSubgroupName; - - if ( first ) { - out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader())); - first = false; - } - - for ( List row : eval.getTableRows() ) - out.printf("%20s %s %s%n", evalName, formatKeyword(keyWord), Utils.join("\t", row)); - } - } - } - } - - protected Logger getLogger() { return logger; } -} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java deleted file mode 100755 index 522d1a538..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java +++ /dev/null @@ -1,72 +0,0 @@ -package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; - -import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - -import java.util.List; -import java.util.ArrayList; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Jan 29, 2010 - * Time: 3:38:02 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class VariantEvaluator { - protected boolean accumulateInterestingSites = false, printInterestingSites = false; - protected String interestingSitePrefix = null; - protected boolean processedASite = false; - protected List interestingSites = new ArrayList(); - - public abstract String getName(); - - // do we want to keep track of things that are interesting - public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; } - public void printInterestingSites(String prefix) { printInterestingSites = true; interestingSitePrefix = prefix; } - public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; } - public List getInterestingSites() { return interestingSites; } - protected void addInterestingSite(String why, VariantContext vc) { - if ( accumulateInterestingSites ) - interestingSites.add(vc); - if ( printInterestingSites ) - System.out.printf("%40s %s%n", interestingSitePrefix, why); - } - - public boolean processedAnySites() { return processedASite; } - protected void markSiteAsProcessed() { processedASite = true; } - - /** Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 */ - public abstract int getComparisonOrder(); - - /** called at all sites, regardless of eval context itself; useful for counting processed bases */ - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } - public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } - public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } - - public void finalize() {} - - public abstract String toString(); - - // making it a table - public abstract List getTableHeader(); - public abstract List> getTableRows(); - - // - // useful common utility routines - // - protected double rate(long n, long d) { - return n / (1.0 * Math.max(d, 1)); - } - - protected long inverseRate(long n, long d) { - return n == 0 ? 0 : d / Math.max(n, 1); - } - - protected double ratio(long num, long denom) { - return ((double)num) / (Math.max(denom, 1)); - } - -}