Transitioned over to VE3 architecture.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5141 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2011-01-31 17:54:18 +00:00
parent 401feca90d
commit 3f387bc8d8
6 changed files with 29 additions and 436 deletions

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.report.tags.Analysis;
import org.broadinstitute.sting.utils.report.tags.DataPoint;
@ -56,7 +56,7 @@ public class ACTransitionTable extends VariantEvaluator {
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval != null && ! initialized ) {
this.veWalker.getLogger().warn("Initializing...");
//this.veWalker.getLogger().warn("Initializing...");
initialize(eval);
initialized = true;
}
@ -112,13 +112,13 @@ public class ACTransitionTable extends VariantEvaluator {
}
public ACTransitionTable(VariantEvalWalker parent) {
super(parent);
//super(parent);
}
public void initialize(VariantContext vc) {
Set<String> permuteSamples = vc.getSampleNames();
permutations = new String[NUM_PERMUTATIONS][permuteSamples.size()];
veWalker.getLogger().warn(String.format("Num samples: %d",permuteSamples.size()));
//veWalker.getLogger().warn(String.format("Num samples: %d",permuteSamples.size()));
int offset = 0;
for ( String s : permuteSamples ) {
permutations[0][offset] = s;
@ -145,7 +145,7 @@ public class ACTransitionTable extends VariantEvaluator {
}
public void finalizeEvaluation() { // note: data points are null when this is called (wtf?)
veWalker.getLogger().info(String.format("Skipped: %d",skipped));
//veWalker.getLogger().info(String.format("Skipped: %d",skipped));
}
class TransitionTable implements TableType {

View File

@ -6,9 +6,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.utils.report.tags.Analysis;
import org.broadinstitute.sting.utils.report.tags.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
import org.broadinstitute.sting.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.analysis.AminoAcid;
import org.broadinstitute.sting.utils.analysis.AminoAcidTable;
@ -53,7 +53,7 @@ public class AminoAcidTransition extends VariantEvaluator {
////////////////////////////////////////////////////////////
// a mapping from amino acid transition score histogram bin to Ti/Tv ratio
@DataPoint(name="Amino Acid Table", description = "TiTv counts by amino acid change")
@DataPoint(description = "TiTv counts by amino acid change")
AminoAcidTiTvTable acidTable = null;
class TiTvCount {
@ -129,8 +129,9 @@ public class AminoAcidTransition extends VariantEvaluator {
private AminoAcidTable lookup;
public AminoAcidTransition(VariantEvalWalker parent) {
super(parent);
enabled = parent.aminoAcidTransitionKey != null;
//super(parent);
//enabled = parent.aminoAcidTransitionKey != null;
enabled = true;
if ( enabled ) {
getParsingInformation(parent);
lookup = new AminoAcidTable();
@ -140,9 +141,14 @@ public class AminoAcidTransition extends VariantEvaluator {
private void getParsingInformation(VariantEvalWalker parent) {
if ( enabled() ) {
infoKey = parent.aminoAcidTransitionKey;
infoValueSplit = parent.aminoAcidTransitionSplit;
useCodons = parent.aatUseCodons;
// infoKey = parent.aminoAcidTransitionKey;
// infoValueSplit = parent.aminoAcidTransitionSplit;
// useCodons = parent.aatUseCodons;
infoKey = null;
infoValueSplit = null;
useCodons = false;
if ( infoKey == null ) {
throw new UserException.CommandLineException("No info-field key provided for amino acid tabulation. Please provide the appropriate key with -aatk.");
}
@ -180,7 +186,7 @@ public class AminoAcidTransition extends VariantEvaluator {
first = parsedNames [0];
second = parsedNames [1];
} catch (ArrayIndexOutOfBoundsException e) {
getLogger().warn("Error parsing variant context with value "+eval.getAttribute(infoKey));
//getLogger().warn("Error parsing variant context with value "+eval.getAttribute(infoKey));
}
AminoAcid reference;
AminoAcid alternate;

View File

@ -1,75 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.utils.report.tags.Analysis;
import java.util.ArrayList;
import java.util.List;
/**
* Extends the Per-sample variant evaluator class and returns, for each sample, the number of variants, the Ti/Tv, and
* the comp overlap. It does this only on sites where the sample is identified as hom var, or het.
*/
@org.broadinstitute.sting.utils.report.tags.Analysis(name="FunctionalClassBySample",description="Count of SNPs by functional class by sample")
public class FunctionalClassBySample extends VariantEvaluatorBySample {
public FunctionalClassBySample(VariantEvalWalker parent) { super(parent); }
public List<SampleDataPoint> getDataPoints() {
List<SampleDataPoint> points = new ArrayList<SampleDataPoint>(10);
points.add(new FCPoint("miRNA","miRNA"));
points.add(new FCPoint("3'-UTR","3'-UTR"));
points.add(new FCPoint("Intron","Intron"));
points.add(new FCPoint("Splice_site","Splice_site"));
points.add(new FCPoint("Read-through","Read-through"));
points.add(new FCPoint("Nonsense","Nonsense"));
points.add(new FCPoint("Missense","Missense"));
points.add(new FCPoint("Synonymous","Synonymous"));
points.add(new FCPoint("5'-UTR","5'-UTR"));
points.add(new FCPoint("Promoter","Promoter"));
return points;
}
public String getTableName() { return "Functional Class Counts by Sample"; }
public String getName() { return "Functional Class Counts by Sample"; }
public int getComparisonOrder() { return 1; }
public boolean enabled() { return true; }
public boolean includeGenotype(Genotype g) { return ( ! g.isFiltered() ) && ( g.isHet() || g.isHomVar() ); }
}
class FCPoint extends SampleDataPoint {
private String matchStr;
private int count;
public FCPoint(String fcName, String fcMatch) {
super(fcName);
matchStr = fcMatch;
count = 0;
}
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc == null ) {
return;
} else {
String type = vc.getAttributeAsString("type","none");
if ( type.equalsIgnoreCase(matchStr) ) {
count++;
}
}
}
public String toString() {
return String.format("%d",count);
}
}

View File

@ -6,9 +6,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.utils.report.tags.Analysis;
import org.broadinstitute.sting.utils.report.tags.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
import org.broadinstitute.sting.utils.report.utils.TableType;
import java.util.Arrays;
@ -31,7 +31,7 @@ public class PrivatePermutations extends VariantEvaluator {
private boolean initialized = false;
private long skipped = 0l;
@DataPoint(name="Marginal Number of Mutations",description="Number of additional mutations from each new sample; random permutations")
@DataPoint(description="Number of additional mutations from each new sample; random permutations")
AdditionalBySample permuteCounts = null;
String[][] permutations;
@ -50,7 +50,7 @@ public class PrivatePermutations extends VariantEvaluator {
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval != null && ! initialized ) {
this.veWalker.getLogger().warn("Initializing...");
//this.veWalker.getLogger().warn("Initializing...");
initialize(eval);
initialized = true;
}
@ -100,13 +100,13 @@ public class PrivatePermutations extends VariantEvaluator {
}
public PrivatePermutations(VariantEvalWalker parent) {
super(parent);
//super(parent);
}
public void initialize(VariantContext vc) {
Set<String> permuteSamples = vc.getSampleNames();
permutations = new String[NUM_PERMUTATIONS][permuteSamples.size()];
veWalker.getLogger().warn(String.format("Num samples: %d",permuteSamples.size()));
//veWalker.getLogger().warn(String.format("Num samples: %d",permuteSamples.size()));
int offset = 0;
for ( String s : permuteSamples ) {
permutations[0][offset] = s;
@ -164,6 +164,6 @@ public class PrivatePermutations extends VariantEvaluator {
}
public void finalizeEvaluation() {
veWalker.getLogger().info(String.format("Skipped: %d",skipped));
//veWalker.getLogger().info(String.format("Skipped: %d",skipped));
}
}

View File

@ -1,138 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.utils.report.tags.Analysis;
import java.util.ArrayList;
import java.util.List;
/**
* Extends the Per-sample variant evaluator class and returns, for each sample, the number of variants, the Ti/Tv, and
* the comp overlap. It does this only on sites where the sample is identified as hom var, or het.
*/
@Analysis(name = "Simple Metrics by Sample", description = "Variant counts, Ti/Tv, comp overlap; per sample")
public class SimpleMetricsBySample extends VariantEvaluatorBySample {
public SimpleMetricsBySample(VariantEvalWalker parent) { super(parent); }
public List<SampleDataPoint> getDataPoints() {
List<SampleDataPoint> points = new ArrayList(3);
points.add(new CountSNPsSample());
points.add(new TiTvRatioSample());
points.add(new HetHomRatioSample());
points.add(new CompOverlapSample());
return points;
}
public String getTableName() {
return "SimpleMetricsBySample";
}
public String getName() {
return "SimpleMetricsBySample";
}
public int getComparisonOrder() { return 2; }
public boolean includeGenotype(Genotype g) {
return (g.isHet() || g.isHomVar()) && ! g.isFiltered();
}
public boolean enabled() {
return true;
}
}
class CountSNPsSample extends SampleDataPoint {
int numVariants = 0;
public CountSNPsSample() {
super("CountVariants");
}
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc != null && vc.isSNP() ) {
numVariants++;
}
}
public String toString() {
return String.format("%d",numVariants);
}
}
class TiTvRatioSample extends SampleDataPoint {
int nTi = 0;
int nTv = 0;
public TiTvRatioSample() {
super("TiTvRatio");
}
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc != null && vc.isSNP() && vc.isBiallelic() ) {
if ( VariantContextUtils.isTransition(vc) ) {
nTi++;
} else {
nTv++;
}
}
}
public String toString() {
return String.format("%.2f", ( ((double) nTi )/ nTv));
}
}
class HetHomRatioSample extends SampleDataPoint {
int nHet = 0;
int nHomVar = 0;
public HetHomRatioSample() {
super("HetHomRatio");
}
public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc != null ) {
Genotype g = vc.getGenotype(sampleName);
if ( g != null ) {
if ( g.isHet() ) {
nHet++;
} else if (g.isHomVar()) {
nHomVar++;
}
}
}
}
public String toString() {
return String.format("%.2f", ( ((double) nHet )/ nHomVar));
}
}
class CompOverlapSample extends SampleDataPoint {
int nOverlap = 0;
public CompOverlapSample() {
super("CompOverlap");
}
public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP() ;
boolean evalIsGood = eval != null && eval.isSNP();
if ( compIsGood && evalIsGood ) {
nOverlap++;
}
}
public String toString() {
return String.format("%d",nOverlap);
}
}

View File

@ -1,200 +0,0 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.utils.report.tags.DataPoint;
import org.broadinstitute.sting.utils.report.utils.TableType;
import java.util.*;
/**
* An abstract way to break variant analyses down by sample. SampleDataPoint objects (e.g. its inheritors) are propagated
* into a per-sample table, which is updated only when a specific sample's genotype is such that the module-defined
* includeGenotype(G) returns true.
* @Author chartl
*/
public abstract class VariantEvaluatorBySample extends VariantEvaluator {
@DataPoint(name="VariantEvaluatorBySample",description="Evaluation broken down by sample")
EvalBySample evalBySample;
public VariantEvaluatorBySample(VariantEvalWalker parent) {
super(parent);
evalBySample = initializeTable();
}
public abstract String getTableName();
public abstract List<SampleDataPoint> getDataPoints();
public abstract boolean includeGenotype(Genotype g);
public EvalBySample initializeTable() {
if ( enabled() ) {
EvalBySample ebs = new EvalBySample(getTableName(),getDataPoints());
return ebs;
} else {
return null;
}
}
// note -- this only updates at all sites after the first site where a sample has been identified containing a variant genotype
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( Map.Entry<String,List<SampleDataPoint>> entry : evalBySample.sampleAndEvalResults.entrySet() ) {
for ( SampleDataPoint dp : entry.getValue() ) {
dp.update0(tracker,ref,context);
}
}
}
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
for ( String sample : vc1.getSampleNames() ) {
if ( includeGenotype(vc1.getGenotype(sample)) ) {
if ( ! evalBySample.containsKey(sample) ) {
evalBySample.put(sample,getDataPoints());
}
for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
dp.update1(vc1,tracker,ref,context);
}
}
}
return null; // don't return interesting sites
}
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc1 == null ) {
return null; // cannot update by sample if there are no samples
}
for ( String sample : vc1.getSampleNames() ) {
if ( includeGenotype(vc1.getGenotype(sample)) ) {
if ( ! evalBySample.containsKey(sample) ) {
evalBySample.put(sample,getDataPoints());
}
for ( SampleDataPoint dp : evalBySample.sampleAndEvalResults.get(sample) ) {
dp.update2(vc1,vc2,tracker,ref,context);
}
}
}
return null; // don't return interesting sites
}
@Override
public void finalizeEvaluation() {
evalBySample.finalizeTable();
}
}
abstract class SampleDataPoint {
public String name;
public String sampleName;
public SampleDataPoint(String name) {
this.name = name;
}
public String getName() {
return name;
}
public void setSampleName(String sName) {
sampleName = sName;
}
public abstract String toString();
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public void update2(VariantContext eval, VariantContext comp,RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
public void finalizeCalculation() {}
}
class EvalBySample implements TableType {
public String[] evalNames;
public TreeMap<String, List<SampleDataPoint>> sampleAndEvalResults;
public String name;
private HashMap<String,Integer> nameToDataPointOffset;
private Object[][] finalizedResults;
public EvalBySample(String name, Collection<SampleDataPoint> evals) {
int i = 0;
this.evalNames = new String[evals.size()];
this.nameToDataPointOffset = new HashMap<String,Integer>(evals.size());
for ( SampleDataPoint s : evals ) {
this.evalNames[i] = s.getName();
this.nameToDataPointOffset.put(s.getName(),i);
i++;
}
this.name = name;
this.sampleAndEvalResults = new TreeMap<String,List<SampleDataPoint>>();
}
public Object[] getColumnKeys() {
//System.out.printf("%s%n","Call to column keys");
return evalNames;
}
public String getCell(int x, int y) {
return finalizedResults[x][y].toString();
}
public String getName() {
return name;
}
public Object[] getRowKeys() {
String[] rowNames = new String[sampleAndEvalResults.size()];
int i = 0;
for ( Map.Entry<String,List<SampleDataPoint>> e : sampleAndEvalResults.entrySet() ) {
rowNames[i] = e.getKey();
i++;
}
//System.out.printf("%s%n","Call to row keys");
return rowNames;
}
public void finalizeTable() {
if ( sampleAndEvalResults == null || sampleAndEvalResults.size() == 0 ) {
finalizedResults = new Object[0][0];
return; // todo -- early return is hacky
}
finalizedResults = new Object[sampleAndEvalResults.size()][sampleAndEvalResults.firstEntry().getValue().size()];
int i = 0;
for ( Map.Entry<String,List<SampleDataPoint>> evalBySample : sampleAndEvalResults.entrySet() ) {
int j = 0;
for ( SampleDataPoint o : evalBySample.getValue() ) {
o.finalizeCalculation();
finalizedResults[i][j] = o;
j++;
}
i++;
}
}
public boolean containsKey(String sample) {
return sampleAndEvalResults.containsKey(sample);
}
public void put(String sample, List<SampleDataPoint> dataPoints) {
for ( SampleDataPoint dp : dataPoints ) {
dp.setSampleName(sample);
}
sampleAndEvalResults.put(sample,dataPoints);
}
}