Added: A VariantEval module that gives simple metrics by sample, an an abstract class that makes per-sample modules easy to write (but a little bit clunky since a class needs be defined for each data point -- see SimpleMetricsBySample as an example). AnalysisModuleScanner needed a slight update to pull in data points from parent classes for this to work (thanks Khalid for showing me how to do this). After a code review with Aaron (thanks) and ensuring integration tests pass, I am committing.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3939 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f13d52e427
commit
38e65f6e1b
|
|
@ -0,0 +1,112 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.SampleDataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluatorBySample;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Extends the Per-sample variant evaluator class and returns, for each sample, the number of variants, the Ti/Tv, and
|
||||
* the comp overlap. It does this only on sites where the sample is identified as hom var, or het.
|
||||
*/
|
||||
@Analysis(name = "Simple Metrics by Sample", description = "Variant counts, Ti/Tv, comp overlap; per sample")
|
||||
public class SimpleMetricsBySample extends VariantEvaluatorBySample {
|
||||
public SimpleMetricsBySample(VariantEvalWalker parent) { super(parent); }
|
||||
|
||||
public List<SampleDataPoint> getDataPoints() {
|
||||
List<SampleDataPoint> points = new ArrayList(3);
|
||||
points.add(new CountSNPsSample());
|
||||
points.add(new TiTvRatioSample());
|
||||
points.add(new CompOverlapSample());
|
||||
|
||||
return points;
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return "SimpleMetricsBySample";
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "SimpleMetricsBySample";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() { return 2; }
|
||||
|
||||
public boolean includeGenotype(Genotype g) {
|
||||
return (g.isHet() || g.isHomVar()) && ! g.isFiltered();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class CountSNPsSample extends SampleDataPoint {
|
||||
int numVariants = 0;
|
||||
|
||||
public CountSNPsSample() {
|
||||
super("CountVariants");
|
||||
}
|
||||
|
||||
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( vc != null && vc.isSNP() ) {
|
||||
numVariants++;
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%d",numVariants);
|
||||
}
|
||||
}
|
||||
|
||||
class TiTvRatioSample extends SampleDataPoint {
|
||||
int nTi = 0;
|
||||
int nTv = 0;
|
||||
|
||||
public TiTvRatioSample() {
|
||||
super("TiTvRatio");
|
||||
}
|
||||
|
||||
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( vc != null && vc.isSNP() ) {
|
||||
if ( vc.isTransition() ) {
|
||||
nTi++;
|
||||
} else {
|
||||
nTv++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%.2f", ( ((double) nTi )/ nTv));
|
||||
}
|
||||
}
|
||||
|
||||
class CompOverlapSample extends SampleDataPoint {
|
||||
int nOverlap = 0;
|
||||
|
||||
public CompOverlapSample() {
|
||||
super("CompOverlap");
|
||||
}
|
||||
|
||||
public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
|
||||
boolean evalIsGood = eval != null && eval.isSNP();
|
||||
if ( compIsGood && evalIsGood ) {
|
||||
nOverlap++;
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%d",nOverlap);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An abstract way to break variant analyses down by sample. SampleDataPoint objects (e.g. its inheritors) are propagated
|
||||
* into a per-sample table, which is updated only when a specific sample's genotype is such that the module-defined
|
||||
* includeGenotype(G) returns true.
|
||||
* @Author chartl
|
||||
*/
|
||||
public abstract class VariantEvaluatorBySample extends VariantEvaluator {
|
||||
@DataPoint(name="VariantEvaluatorBySample",description="Evaluation broken down by sample")
|
||||
EvalBySample evalBySample;
|
||||
|
||||
public VariantEvaluatorBySample(VariantEvalWalker parent) {
|
||||
super(parent);
|
||||
evalBySample = initializeTable();
|
||||
}
|
||||
|
||||
public abstract String getTableName();
|
||||
|
||||
public abstract List<SampleDataPoint> getDataPoints();
|
||||
|
||||
public abstract boolean includeGenotype(Genotype g);
|
||||
|
||||
public EvalBySample initializeTable() {
|
||||
if ( enabled() ) {
|
||||
EvalBySample ebs = new EvalBySample(getTableName(),getDataPoints());
|
||||
return ebs;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// note -- this only updates at all sites after the first site where a sample has been identified containing a variant genotype
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( Map.Entry<String,List<SampleDataPoint>> entry : evalBySample.sampleAndEvalResults.entrySet() ) {
|
||||
for ( SampleDataPoint dp : entry.getValue() ) {
|
||||
dp.update0(tracker,ref,context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( String sample : vc1.getSampleNames() ) {
|
||||
if ( includeGenotype(vc1.getGenotype(sample)) ) {
|
||||
if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) {
|
||||
evalBySample.sampleAndEvalResults.put(sample,getDataPoints());
|
||||
}
|
||||
|
||||
for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
|
||||
dp.update1(vc1,tracker,ref,context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null; // don't return interesting sites
|
||||
}
|
||||
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( vc1 == null ) {
|
||||
return null; // cannot update by sample if there are no samples
|
||||
}
|
||||
for ( String sample : vc1.getSampleNames() ) {
|
||||
if ( includeGenotype(vc1.getGenotype(sample)) ) {
|
||||
if ( ! evalBySample.sampleAndEvalResults.containsKey(sample) ) {
|
||||
evalBySample.sampleAndEvalResults.put(sample,getDataPoints());
|
||||
}
|
||||
|
||||
for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
|
||||
dp.update2(vc1,vc2,tracker,ref,context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null; // don't return interesting sites
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
evalBySample.finalizeTable();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
abstract class SampleDataPoint {
|
||||
public String name;
|
||||
|
||||
public SampleDataPoint(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public abstract String toString();
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
|
||||
|
||||
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
|
||||
|
||||
public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
|
||||
|
||||
public void finalizeCalculation() {}
|
||||
}
|
||||
|
||||
class EvalBySample implements TableType {
|
||||
public String[] evalNames;
|
||||
public TreeMap<String, List<SampleDataPoint>> sampleAndEvalResults;
|
||||
public String name;
|
||||
private HashMap<String,Integer> nameToDataPointOffset;
|
||||
|
||||
private Object[][] finalizedResults;
|
||||
|
||||
public EvalBySample(String name, Collection<SampleDataPoint> evals) {
|
||||
int i = 0;
|
||||
this.evalNames = new String[evals.size()];
|
||||
this.nameToDataPointOffset = new HashMap<String,Integer>(evals.size());
|
||||
for ( SampleDataPoint s : evals ) {
|
||||
this.evalNames[i] = s.getName();
|
||||
this.nameToDataPointOffset.put(s.getName(),i);
|
||||
i++;
|
||||
}
|
||||
|
||||
this.name = name;
|
||||
this.sampleAndEvalResults = new TreeMap<String,List<SampleDataPoint>>();
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
//System.out.printf("%s%n","Call to column keys");
|
||||
return evalNames;
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
return finalizedResults[x][y].toString();
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
String[] rowNames = new String[sampleAndEvalResults.size()];
|
||||
int i = 0;
|
||||
for ( Map.Entry<String,List<SampleDataPoint>> e : sampleAndEvalResults.entrySet() ) {
|
||||
rowNames[i] = e.getKey();
|
||||
i++;
|
||||
}
|
||||
|
||||
//System.out.printf("%s%n","Call to row keys");
|
||||
|
||||
return rowNames;
|
||||
}
|
||||
|
||||
public void finalizeTable() {
|
||||
finalizedResults = new Object[sampleAndEvalResults.size()][sampleAndEvalResults.size()];
|
||||
int i = 0;
|
||||
for ( Map.Entry<String,List<SampleDataPoint>> evalBySample : sampleAndEvalResults.entrySet() ) {
|
||||
int j = 0;
|
||||
for ( SampleDataPoint o : evalBySample.getValue() ) {
|
||||
o.finalizeCalculation();
|
||||
finalizedResults[i][j] = o;
|
||||
j++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasSample(String sample) {
|
||||
return sampleAndEvalResults.containsKey(sample);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -87,13 +87,15 @@ public class AnalysisModuleScanner {
|
|||
*/
|
||||
private void scanFields() {
|
||||
// get the fields from the class, and extract
|
||||
for (Field f : cls.getDeclaredFields())
|
||||
for (Annotation annotation : f.getAnnotations()) {
|
||||
if (annotation.annotationType().equals(Param.class))
|
||||
parameters.put(f, (Param) annotation);
|
||||
if (annotation.annotationType().equals(DataPoint.class))
|
||||
datums.put(f,(DataPoint) annotation);
|
||||
}
|
||||
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
|
||||
for (Field f : superCls.getDeclaredFields())
|
||||
for (Annotation annotation : f.getAnnotations()) {
|
||||
if (annotation.annotationType().equals(Param.class))
|
||||
parameters.put(f, (Param) annotation);
|
||||
if (annotation.annotationType().equals(DataPoint.class))
|
||||
datums.put(f,(DataPoint) annotation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue