Added: A VariantEval module that gives simple metrics by sample, an an abstract class that makes per-sample modules easy to write (but a little bit clunky since a class needs be defined for each data point -- see SimpleMetricsBySample as an example). AnalysisModuleScanner needed a slight update to pull in data points from parent classes for this to work (thanks Khalid for showing me how to do this). After a code review with Aaron (thanks) and ensuring integration tests pass, I am committing.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3939 348d0f76-0448-11de-a6fe-93d51630548a
2010-08-04 19:37:39 +00:00 · 2010-08-04 19:37:39 +00:00 · 38e65f6e1b
parent f13d52e427
commit 38e65f6e1b
3 changed files with 304 additions and 7 deletions
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java
@ -0,0 +1,112 @@
+package org.broadinstitute.sting.gatk.walkers.varianteval;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.varianteval.SampleDataPoint;
+import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluatorBySample;
+import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
+import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Extends the Per-sample variant evaluator class and returns, for each sample, the number of variants, the Ti/Tv, and
+ * the comp overlap. It does this only on sites where the sample is identified as hom var, or het.
+ */
+@Analysis(name = "Simple Metrics by Sample", description = "Variant counts, Ti/Tv, comp overlap; per sample")
+public class SimpleMetricsBySample extends VariantEvaluatorBySample {
+    public SimpleMetricsBySample(VariantEvalWalker parent) { super(parent); }
+
+    public List<SampleDataPoint> getDataPoints() {
+        List<SampleDataPoint> points = new ArrayList(3);
+        points.add(new CountSNPsSample());
+        points.add(new TiTvRatioSample());
+        points.add(new CompOverlapSample());
+
+        return points;
+    }
+
+    public String getTableName() {
+        return "SimpleMetricsBySample";
+    }
+
+    public String getName() {
+        return "SimpleMetricsBySample";
+    }
+
+    public int getComparisonOrder() { return 2; }
+
+    public boolean includeGenotype(Genotype g) {
+        return (g.isHet() || g.isHomVar()) && ! g.isFiltered();
+    }
+
+    public boolean enabled() {
+        return true;
+    }
+
+}
+
+class CountSNPsSample extends SampleDataPoint {
+    int numVariants = 0;
+
+    public CountSNPsSample() {
+        super("CountVariants");
+    }
+
+    public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( vc != null && vc.isSNP() ) {
+            numVariants++;
+        }
+    }
+
+    public String toString() {
+        return String.format("%d",numVariants);
+    }
+}
+
+class TiTvRatioSample extends SampleDataPoint {
+    int nTi = 0;
+    int nTv = 0;
+
+    public TiTvRatioSample() {
+        super("TiTvRatio");
+    }
+
+    public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( vc != null && vc.isSNP() ) {
+            if ( vc.isTransition() ) {
+                nTi++;
+            } else {
+                nTv++;
+            }
+        }
+    }
+
+    public String toString() {
+        return String.format("%.2f", ( ((double) nTi )/ nTv));
+    }
+}
+
+class CompOverlapSample extends SampleDataPoint {
+    int nOverlap = 0;
+
+    public CompOverlapSample() {
+        super("CompOverlap");
+    }
+
+    public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
+        boolean evalIsGood = eval != null && eval.isSNP();
+        if ( compIsGood && evalIsGood ) {
+            nOverlap++;
+        }
+    }
+
+    public String toString() {
+        return String.format("%d",nOverlap);
+    }
+}
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java
@ -0,0 +1,183 @@
+package org.broadinstitute.sting.gatk.walkers.varianteval;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
+import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
+import org.broadinstitute.sting.playground.utils.report.utils.TableType;
+
+import java.util.*;
+
+/**
+ * An abstract way to break variant analyses down by sample. SampleDataPoint objects (e.g. its inheritors) are propagated
+ * into a per-sample table, which is updated only when a specific sample's genotype is such that the module-defined
+ * includeGenotype(G) returns true.
+ * @Author chartl
+ */
+public abstract class VariantEvaluatorBySample extends VariantEvaluator {
+    @DataPoint(name="VariantEvaluatorBySample",description="Evaluation broken down by sample")
+    EvalBySample evalBySample;
+
+    public VariantEvaluatorBySample(VariantEvalWalker parent) {
+        super(parent);
+        evalBySample = initializeTable();
+    }
+
+    public abstract String getTableName();
+
+    public abstract List<SampleDataPoint> getDataPoints();
+
+    public abstract boolean includeGenotype(Genotype g);
+
+    public EvalBySample initializeTable() {
+        if ( enabled() ) {
+            EvalBySample ebs = new EvalBySample(getTableName(),getDataPoints());
+            return ebs;
+        } else {
+            return null;
+        }
+    }
+
+    // note -- this only updates at all sites after the first site where a sample has been identified containing a variant genotype
+    public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        for ( Map.Entry<String,List<SampleDataPoint>> entry : evalBySample.sampleAndEvalResults.entrySet() ) {
+            for ( SampleDataPoint dp : entry.getValue() ) {
+                dp.update0(tracker,ref,context);
+            }
+        }
+    }
+
+    public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        for ( String sample : vc1.getSampleNames() ) {
+            if ( includeGenotype(vc1.getGenotype(sample)) ) {
+                if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) {
+                    evalBySample.sampleAndEvalResults.put(sample,getDataPoints());
+                }
+
+                for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
+                    dp.update1(vc1,tracker,ref,context);
+                }
+            }
+        }
+
+        return null; // don't return interesting sites
+    }
+
+    public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( vc1 == null ) {
+            return null; // cannot update by sample if there are no samples
+        }
+        for ( String sample : vc1.getSampleNames() ) {
+            if ( includeGenotype(vc1.getGenotype(sample)) ) {
+                if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) {
+                    evalBySample.sampleAndEvalResults.put(sample,getDataPoints());
+                }
+
+                for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
+                    dp.update2(vc1,vc2,tracker,ref,context);
+                }
+            }
+        }
+
+        return null; // don't return interesting sites
+    }
+
+    @Override
+    public void finalizeEvaluation() {
+        evalBySample.finalizeTable();
+    }
+
+}
+
+abstract class SampleDataPoint {
+    public String name;
+
+    public SampleDataPoint(String name) {
+        this.name = name;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public abstract String toString();
+
+    public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
+
+    public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
+
+    public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
+
+    public void finalizeCalculation() {}
+}
+
+class EvalBySample implements TableType {
+    public String[] evalNames;
+    public TreeMap<String, List<SampleDataPoint>> sampleAndEvalResults;
+    public String name;
+    private HashMap<String,Integer> nameToDataPointOffset;
+
+    private Object[][] finalizedResults;
+
+    public EvalBySample(String name, Collection<SampleDataPoint> evals) {
+        int i = 0;
+        this.evalNames = new String[evals.size()];
+        this.nameToDataPointOffset = new HashMap<String,Integer>(evals.size());
+        for ( SampleDataPoint s : evals ) {
+            this.evalNames[i] = s.getName();
+            this.nameToDataPointOffset.put(s.getName(),i);
+            i++;
+        }
+
+        this.name = name;
+        this.sampleAndEvalResults = new TreeMap<String,List<SampleDataPoint>>();
+    }
+
+    public Object[] getColumnKeys() {
+        //System.out.printf("%s%n","Call to column keys");
+        return evalNames;
+    }
+
+    public String getCell(int x, int y) {
+        return finalizedResults[x][y].toString();
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public Object[] getRowKeys() {
+        String[] rowNames = new String[sampleAndEvalResults.size()];
+        int i = 0;
+        for ( Map.Entry<String,List<SampleDataPoint>> e : sampleAndEvalResults.entrySet() ) {
+            rowNames[i] = e.getKey();
+            i++;
+        }
+
+        //System.out.printf("%s%n","Call to row keys");
+
+        return rowNames;
+    }
+
+    public void finalizeTable() {
+        finalizedResults = new Object[sampleAndEvalResults.size()][sampleAndEvalResults.size()];
+        int i = 0;
+        for ( Map.Entry<String,List<SampleDataPoint>> evalBySample : sampleAndEvalResults.entrySet() ) {
+            int j = 0;
+            for ( SampleDataPoint o : evalBySample.getValue() ) {
+                o.finalizeCalculation();
+                finalizedResults[i][j] = o;
+                j++;
+            }
+            i++;
+        }
+    }
+
+    public boolean hasSample(String sample) {
+        return sampleAndEvalResults.containsKey(sample);
+    }
+
+}
--- a/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java
+++ b/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java
@ -87,13 +87,15 @@ public class AnalysisModuleScanner {
     */
    private void scanFields() {
        // get the fields from the class, and extract
-        for (Field f : cls.getDeclaredFields())
-            for (Annotation annotation : f.getAnnotations()) {
-                if (annotation.annotationType().equals(Param.class))
-                    parameters.put(f, (Param) annotation);
-                if (annotation.annotationType().equals(DataPoint.class))
-                    datums.put(f,(DataPoint) annotation);
-            }
+        for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
+            for (Field f : superCls.getDeclaredFields())
+                for (Annotation annotation : f.getAnnotations()) {
+                    if (annotation.annotationType().equals(Param.class))
+                        parameters.put(f, (Param) annotation);
+                    if (annotation.annotationType().equals(DataPoint.class))
+                        datums.put(f,(DataPoint) annotation);
+                }
+        }
    }

    /**