Added: A VariantEval module that gives simple metrics by sample, an an abstract class that makes per-sample modules easy to write (but a little bit clunky since a class needs be defined for each data point -- see SimpleMetricsBySample as an example). AnalysisModuleScanner needed a slight update to pull in data points from parent classes for this to work (thanks Khalid for showing me how to do this). After a code review with Aaron (thanks) and ensuring integration tests pass, I am committing.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3939 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-08-04 19:37:39 +00:00
parent f13d52e427
commit 38e65f6e1b
3 changed files with 304 additions and 7 deletions

View File

@ -0,0 +1,112 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.SampleDataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluatorBySample;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import java.util.ArrayList;
import java.util.List;
/**
* Extends the Per-sample variant evaluator class and returns, for each sample, the number of variants, the Ti/Tv, and
* the comp overlap. It does this only on sites where the sample is identified as hom var, or het.
*/
@Analysis(name = "Simple Metrics by Sample", description = "Variant counts, Ti/Tv, comp overlap; per sample")
public class SimpleMetricsBySample extends VariantEvaluatorBySample {
public SimpleMetricsBySample(VariantEvalWalker parent) { super(parent); }
public List<SampleDataPoint> getDataPoints() {
List<SampleDataPoint> points = new ArrayList(3);
points.add(new CountSNPsSample());
points.add(new TiTvRatioSample());
points.add(new CompOverlapSample());
return points;
}
public String getTableName() {
return "SimpleMetricsBySample";
}
public String getName() {
return "SimpleMetricsBySample";
}
public int getComparisonOrder() { return 2; }
public boolean includeGenotype(Genotype g) {
return (g.isHet() || g.isHomVar()) && ! g.isFiltered();
}
public boolean enabled() {
return true;
}
}
class CountSNPsSample extends SampleDataPoint {
int numVariants = 0;
public CountSNPsSample() {
super("CountVariants");
}
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc != null && vc.isSNP() ) {
numVariants++;
}
}
public String toString() {
return String.format("%d",numVariants);
}
}
class TiTvRatioSample extends SampleDataPoint {
int nTi = 0;
int nTv = 0;
public TiTvRatioSample() {
super("TiTvRatio");
}
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc != null && vc.isSNP() ) {
if ( vc.isTransition() ) {
nTi++;
} else {
nTv++;
}
}
}
public String toString() {
return String.format("%.2f", ( ((double) nTi )/ nTv));
}
}
class CompOverlapSample extends SampleDataPoint {
int nOverlap = 0;
public CompOverlapSample() {
super("CompOverlap");
}
public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
boolean evalIsGood = eval != null && eval.isSNP();
if ( compIsGood && evalIsGood ) {
nOverlap++;
}
}
public String toString() {
return String.format("%d",nOverlap);
}
}

View File

@ -0,0 +1,183 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import java.util.*;
/**
* An abstract way to break variant analyses down by sample. SampleDataPoint objects (e.g. its inheritors) are propagated
* into a per-sample table, which is updated only when a specific sample's genotype is such that the module-defined
* includeGenotype(G) returns true.
* @Author chartl
*/
public abstract class VariantEvaluatorBySample extends VariantEvaluator {
@DataPoint(name="VariantEvaluatorBySample",description="Evaluation broken down by sample")
EvalBySample evalBySample;
public VariantEvaluatorBySample(VariantEvalWalker parent) {
super(parent);
evalBySample = initializeTable();
}
public abstract String getTableName();
public abstract List<SampleDataPoint> getDataPoints();
public abstract boolean includeGenotype(Genotype g);
public EvalBySample initializeTable() {
if ( enabled() ) {
EvalBySample ebs = new EvalBySample(getTableName(),getDataPoints());
return ebs;
} else {
return null;
}
}
// note -- this only updates at all sites after the first site where a sample has been identified containing a variant genotype
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( Map.Entry<String,List<SampleDataPoint>> entry : evalBySample.sampleAndEvalResults.entrySet() ) {
for ( SampleDataPoint dp : entry.getValue() ) {
dp.update0(tracker,ref,context);
}
}
}
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( String sample : vc1.getSampleNames() ) {
if ( includeGenotype(vc1.getGenotype(sample)) ) {
if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) {
evalBySample.sampleAndEvalResults.put(sample,getDataPoints());
}
for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
dp.update1(vc1,tracker,ref,context);
}
}
}
return null; // don't return interesting sites
}
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc1 == null ) {
return null; // cannot update by sample if there are no samples
}
for ( String sample : vc1.getSampleNames() ) {
if ( includeGenotype(vc1.getGenotype(sample)) ) {
if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) {
evalBySample.sampleAndEvalResults.put(sample,getDataPoints());
}
for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
dp.update2(vc1,vc2,tracker,ref,context);
}
}
}
return null; // don't return interesting sites
}
@Override
public void finalizeEvaluation() {
evalBySample.finalizeTable();
}
}
abstract class SampleDataPoint {
public String name;
public SampleDataPoint(String name) {
this.name = name;
}
public String getName() {
return name;
}
public abstract String toString();
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public void finalizeCalculation() {}
}
class EvalBySample implements TableType {
public String[] evalNames;
public TreeMap<String, List<SampleDataPoint>> sampleAndEvalResults;
public String name;
private HashMap<String,Integer> nameToDataPointOffset;
private Object[][] finalizedResults;
public EvalBySample(String name, Collection<SampleDataPoint> evals) {
int i = 0;
this.evalNames = new String[evals.size()];
this.nameToDataPointOffset = new HashMap<String,Integer>(evals.size());
for ( SampleDataPoint s : evals ) {
this.evalNames[i] = s.getName();
this.nameToDataPointOffset.put(s.getName(),i);
i++;
}
this.name = name;
this.sampleAndEvalResults = new TreeMap<String,List<SampleDataPoint>>();
}
public Object[] getColumnKeys() {
//System.out.printf("%s%n","Call to column keys");
return evalNames;
}
public String getCell(int x, int y) {
return finalizedResults[x][y].toString();
}
public String getName() {
return name;
}
public Object[] getRowKeys() {
String[] rowNames = new String[sampleAndEvalResults.size()];
int i = 0;
for ( Map.Entry<String,List<SampleDataPoint>> e : sampleAndEvalResults.entrySet() ) {
rowNames[i] = e.getKey();
i++;
}
//System.out.printf("%s%n","Call to row keys");
return rowNames;
}
public void finalizeTable() {
finalizedResults = new Object[sampleAndEvalResults.size()][sampleAndEvalResults.size()];
int i = 0;
for ( Map.Entry<String,List<SampleDataPoint>> evalBySample : sampleAndEvalResults.entrySet() ) {
int j = 0;
for ( SampleDataPoint o : evalBySample.getValue() ) {
o.finalizeCalculation();
finalizedResults[i][j] = o;
j++;
}
i++;
}
}
public boolean hasSample(String sample) {
return sampleAndEvalResults.containsKey(sample);
}
}

View File

@ -87,13 +87,15 @@ public class AnalysisModuleScanner {
*/
private void scanFields() {
// get the fields from the class, and extract
for (Field f : cls.getDeclaredFields())
for (Annotation annotation : f.getAnnotations()) {
if (annotation.annotationType().equals(Param.class))
parameters.put(f, (Param) annotation);
if (annotation.annotationType().equals(DataPoint.class))
datums.put(f,(DataPoint) annotation);
}
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
for (Field f : superCls.getDeclaredFields())
for (Annotation annotation : f.getAnnotations()) {
if (annotation.annotationType().equals(Param.class))
parameters.put(f, (Param) annotation);
if (annotation.annotationType().equals(DataPoint.class))
datums.put(f,(DataPoint) annotation);
}
}
}
/**