diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java new file mode 100755 index 000000000..8d070568f --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java @@ -0,0 +1,112 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.SampleDataPoint; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluatorBySample; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; + +import java.util.ArrayList; +import java.util.List; + +/** + * Extends the Per-sample variant evaluator class and returns, for each sample, the number of variants, the Ti/Tv, and + * the comp overlap. It does this only on sites where the sample is identified as hom var, or het. + */ +@Analysis(name = "Simple Metrics by Sample", description = "Variant counts, Ti/Tv, comp overlap; per sample") +public class SimpleMetricsBySample extends VariantEvaluatorBySample { + public SimpleMetricsBySample(VariantEvalWalker parent) { super(parent); } + + public List getDataPoints() { + List points = new ArrayList(3); + points.add(new CountSNPsSample()); + points.add(new TiTvRatioSample()); + points.add(new CompOverlapSample()); + + return points; + } + + public String getTableName() { + return "SimpleMetricsBySample"; + } + + public String getName() { + return "SimpleMetricsBySample"; + } + + public int getComparisonOrder() { return 2; } + + public boolean includeGenotype(Genotype g) { + return (g.isHet() || g.isHomVar()) && ! g.isFiltered(); + } + + public boolean enabled() { + return true; + } + +} + +class CountSNPsSample extends SampleDataPoint { + int numVariants = 0; + + public CountSNPsSample() { + super("CountVariants"); + } + + public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( vc != null && vc.isSNP() ) { + numVariants++; + } + } + + public String toString() { + return String.format("%d",numVariants); + } +} + +class TiTvRatioSample extends SampleDataPoint { + int nTi = 0; + int nTv = 0; + + public TiTvRatioSample() { + super("TiTvRatio"); + } + + public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( vc != null && vc.isSNP() ) { + if ( vc.isTransition() ) { + nTi++; + } else { + nTv++; + } + } + } + + public String toString() { + return String.format("%.2f", ( ((double) nTi )/ nTv)); + } +} + +class CompOverlapSample extends SampleDataPoint { + int nOverlap = 0; + + public CompOverlapSample() { + super("CompOverlap"); + } + + public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ; + boolean evalIsGood = eval != null && eval.isSNP(); + if ( compIsGood && evalIsGood ) { + nOverlap++; + } + } + + public String toString() { + return String.format("%d",nOverlap); + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java new file mode 100755 index 000000000..0edf08dd9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java @@ -0,0 +1,183 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; +import org.broadinstitute.sting.playground.utils.report.utils.TableType; + +import java.util.*; + +/** + * An abstract way to break variant analyses down by sample. SampleDataPoint objects (e.g. its inheritors) are propagated + * into a per-sample table, which is updated only when a specific sample's genotype is such that the module-defined + * includeGenotype(G) returns true. + * @Author chartl + */ +public abstract class VariantEvaluatorBySample extends VariantEvaluator { + @DataPoint(name="VariantEvaluatorBySample",description="Evaluation broken down by sample") + EvalBySample evalBySample; + + public VariantEvaluatorBySample(VariantEvalWalker parent) { + super(parent); + evalBySample = initializeTable(); + } + + public abstract String getTableName(); + + public abstract List getDataPoints(); + + public abstract boolean includeGenotype(Genotype g); + + public EvalBySample initializeTable() { + if ( enabled() ) { + EvalBySample ebs = new EvalBySample(getTableName(),getDataPoints()); + return ebs; + } else { + return null; + } + } + + // note -- this only updates at all sites after the first site where a sample has been identified containing a variant genotype + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + for ( Map.Entry> entry : evalBySample.sampleAndEvalResults.entrySet() ) { + for ( SampleDataPoint dp : entry.getValue() ) { + dp.update0(tracker,ref,context); + } + } + } + + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + for ( String sample : vc1.getSampleNames() ) { + if ( includeGenotype(vc1.getGenotype(sample)) ) { + if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) { + evalBySample.sampleAndEvalResults.put(sample,getDataPoints()); + } + + for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) { + dp.update1(vc1,tracker,ref,context); + } + } + } + + return null; // don't return interesting sites + } + + public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( vc1 == null ) { + return null; // cannot update by sample if there are no samples + } + for ( String sample : vc1.getSampleNames() ) { + if ( includeGenotype(vc1.getGenotype(sample)) ) { + if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) { + evalBySample.sampleAndEvalResults.put(sample,getDataPoints()); + } + + for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) { + dp.update2(vc1,vc2,tracker,ref,context); + } + } + } + + return null; // don't return interesting sites + } + + @Override + public void finalizeEvaluation() { + evalBySample.finalizeTable(); + } + +} + +abstract class SampleDataPoint { + public String name; + + public SampleDataPoint(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public abstract String toString(); + + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {} + + public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {} + + public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {} + + public void finalizeCalculation() {} +} + +class EvalBySample implements TableType { + public String[] evalNames; + public TreeMap> sampleAndEvalResults; + public String name; + private HashMap nameToDataPointOffset; + + private Object[][] finalizedResults; + + public EvalBySample(String name, Collection evals) { + int i = 0; + this.evalNames = new String[evals.size()]; + this.nameToDataPointOffset = new HashMap(evals.size()); + for ( SampleDataPoint s : evals ) { + this.evalNames[i] = s.getName(); + this.nameToDataPointOffset.put(s.getName(),i); + i++; + } + + this.name = name; + this.sampleAndEvalResults = new TreeMap>(); + } + + public Object[] getColumnKeys() { + //System.out.printf("%s%n","Call to column keys"); + return evalNames; + } + + public String getCell(int x, int y) { + return finalizedResults[x][y].toString(); + } + + public String getName() { + return name; + } + + public Object[] getRowKeys() { + String[] rowNames = new String[sampleAndEvalResults.size()]; + int i = 0; + for ( Map.Entry> e : sampleAndEvalResults.entrySet() ) { + rowNames[i] = e.getKey(); + i++; + } + + //System.out.printf("%s%n","Call to row keys"); + + return rowNames; + } + + public void finalizeTable() { + finalizedResults = new Object[sampleAndEvalResults.size()][sampleAndEvalResults.size()]; + int i = 0; + for ( Map.Entry> evalBySample : sampleAndEvalResults.entrySet() ) { + int j = 0; + for ( SampleDataPoint o : evalBySample.getValue() ) { + o.finalizeCalculation(); + finalizedResults[i][j] = o; + j++; + } + i++; + } + } + + public boolean hasSample(String sample) { + return sampleAndEvalResults.containsKey(sample); + } + +} diff --git a/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java b/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java index 4a0dde3cc..2d6fced05 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java +++ b/java/src/org/broadinstitute/sting/playground/utils/report/AnalysisModuleScanner.java @@ -87,13 +87,15 @@ public class AnalysisModuleScanner { */ private void scanFields() { // get the fields from the class, and extract - for (Field f : cls.getDeclaredFields()) - for (Annotation annotation : f.getAnnotations()) { - if (annotation.annotationType().equals(Param.class)) - parameters.put(f, (Param) annotation); - if (annotation.annotationType().equals(DataPoint.class)) - datums.put(f,(DataPoint) annotation); - } + for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) { + for (Field f : superCls.getDeclaredFields()) + for (Annotation annotation : f.getAnnotations()) { + if (annotation.annotationType().equals(Param.class)) + parameters.put(f, (Param) annotation); + if (annotation.annotationType().equals(DataPoint.class)) + datums.put(f,(DataPoint) annotation); + } + } } /**