VariantEval2 test framework implemented; Kiran is experimenting with the system. Not for use by anyone else. VariantContext appears to work well; I'll release it next week for general use following docs of the functions. Removing newvarianteval and other classes to avoid any future confusion. Update to TraverseLoci and RodLocusView to simplify a few functions and to correct some minor errors. All tests pass without modification.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2748 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-30 20:51:24 +00:00
parent 236764b249
commit 3d45457595
15 changed files with 469 additions and 312 deletions

View File

@ -159,17 +159,9 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
* @return
*/
private long getSkippedBases( GenomeLoc currentPos ) {
long skippedBases = 0;
if ( lastLoc == null ) {
// special case -- we're at the start
//System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc());
GenomeLoc firstLoc = shard.getGenomeLocs().get(0);
skippedBases = currentPos.getStart() - firstLoc.getStart();
} else {
//System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc);
skippedBases = currentPos.minus(lastLoc) - 1;
}
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
Long compStop = lastLoc == null ? shard.getGenomeLocs().get(0).getStart() - 1 : lastLoc.getStop();
long skippedBases = currentPos.getStart() - compStop - 1;
if ( skippedBases < -1 ) { // minus 1 value is ok
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",

View File

@ -109,7 +109,6 @@ public class TraverseLoci extends TraversalEngine {
RodLocusView rodLocusView = (RodLocusView)locusView;
long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) {
// no sense in making the call if you don't have anything interesting to say
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
M x = locusWalker.map(null, null, ac);

View File

@ -23,7 +23,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
for (ReferenceOrderedDatum d : dbsnpList) {
rodDbSNP dbsnpRecord = (rodDbSNP)d;
if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
VariantContext vc = VariantContextAdaptors.dbsnpToVariantContext(dbsnpRecord);
VariantContext vc = VariantContextAdaptors.convertToVariantContext(dbsnpRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);
@ -40,7 +40,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
int n = 0;
for (ReferenceOrderedDatum d : vcfList) {
RodVCF vcfRecord = (RodVCF)d;
VariantContext vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
VariantContext vc = VariantContextAdaptors.convertToVariantContext(vcfRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);

View File

@ -17,10 +17,10 @@ import java.util.*;
*/
public class VariantContext extends AttributedObject {
private GenomeLoc loc;
Type type = Type.UNDETERMINED;
private Type type = Type.UNDETERMINED;
private Set<Allele> alleles = new HashSet<Allele>();
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
Set<Object> filters = new HashSet<Object>();
private Set<Object> filters = new HashSet<Object>();
// ---------------------------------------------------------------------------------------------------------
//

View File

@ -2,20 +2,39 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.util.*;
public class VariantContextAdaptors {
public static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) {
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
return convertToVariantContext(variantContainingObject) != null;
}
public static VariantContext convertToVariantContext(Object variantContainingObject) {
if ( variantContainingObject instanceof rodDbSNP )
return dbsnpToVariantContext((rodDbSNP)variantContainingObject);
else if ( variantContainingObject instanceof RodVCF )
return vcfToVariantContext(((RodVCF)variantContainingObject).getRecord());
else if ( variantContainingObject instanceof VCFRecord )
return vcfToVariantContext((VCFRecord)variantContainingObject);
else
return null;
//throw new IllegalArgumentException("Cannot convert object " + variantContainingObject + " of class " + variantContainingObject.getClass() + " to a variant context");
}
private static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) {
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
VariantContext vc = new VariantContext(dbsnp.getLocation());
// add the reference allele
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
@ -25,7 +44,7 @@ public class VariantContextAdaptors {
// add all of the alt alleles
for ( String alt : dbsnp.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
vc.addAllele(new Allele(alt, false));
@ -37,7 +56,7 @@ public class VariantContextAdaptors {
return null; // can't handle anything else
}
public static VariantContext vcfToVariantContext(RodVCF vcf) {
private static VariantContext vcfToVariantContext(VCFRecord vcf) {
if ( vcf.isSNP() || vcf.isIndel() ) {
VariantContext vc = new VariantContext(vcf.getLocation());

View File

@ -2,9 +2,43 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
import java.util.*;
import org.apache.commons.jexl.*;
import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator;
import org.broadinstitute.sting.utils.StingException;
public class VariantContextUtils {
/** */
public static class MatchExp {
String name;
String expStr;
Expression exp;
public MatchExp(String name, String str, Expression exp) {
this.name = name;
this.expStr = str;
this.exp = exp;
}
}
public static List<MatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
List<MatchExp> exps = new ArrayList<MatchExp>();
for ( Map.Entry<String, String> elt : names_and_exps.entrySet() ) {
String name = elt.getKey();
String expStr = elt.getValue();
if ( name == null || expStr == null ) throw new IllegalArgumentException("Cannot create null expressions : " + name + " " + expStr);
try {
Expression filterExpression = ExpressionFactory.createExpression(expStr);
exps.add(new MatchExp(name, expStr, filterExpression));
} catch (Exception e) {
throw new StingException("Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.");
}
}
return exps;
}
private static final String UNIQUIFIED_SUFFIX = ".unique";
/**

View File

@ -0,0 +1,113 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
import org.broadinstitute.sting.utils.StingException;
import java.util.List;
import java.util.Arrays;
public class CountVariants extends VariantEvaluator {
int nProcessedLoci = 0;
int nCalledLoci = 0;
int nVariantLoci = 0;
int nRefLoci = 0;
int nSNPs = 0;
int nInsertions = 0;
int nDeletions = 0;
int nComplex = 0;
int nNoCalls = 0;
int nHets = 0;
int nHomRef = 0;
int nHomVar = 0;
private double rate(long n) {
return n / (1.0 * Math.max(nProcessedLoci, 1));
}
private long inverseRate(long n) {
return n == 0 ? 0 : nProcessedLoci / Math.max(n, 1);
}
private double ratio(long num, long denom) {
return ((double)num) / (Math.max(denom, 1));
}
public String getName() {
return "Counter";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + 1;
}
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
//nProcessedLoci++;
nCalledLoci++;
if ( vc1.isVariant() ) nVariantLoci++;
switch ( vc1.getType() ) {
case NO_VARIATION: nRefLoci++; break;
case SNP: nSNPs++; break;
case INDEL:
if ( vc1.isInsertion() ) nInsertions++; else nDeletions++;
break;
case MIXED: nComplex++; break;
}
for ( Genotype g : vc1.getGenotypes().values() ) {
switch ( g.getType() ) {
case NO_CALL: nNoCalls++; break;
case HOM_REF: nHomRef++; break;
case HET: nHets++; break;
case HOM_VAR: nHomVar++; break;
default: throw new StingException("BUG: Unexpected genotype type: " + g);
}
}
}
public String toString() {
return "Counter: " + summaryLine();
}
private String summaryLine() {
return String.format("%d %d %d %d " +
"%.2e %d " +
"%d %d %d %d " +
"%d %d %d " +
"%.2e %d %.2f " +
"%.2f %d %.2f",
nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci,
rate(nVariantLoci), inverseRate(nVariantLoci),
nSNPs, nDeletions, nInsertions, nComplex,
nHomRef, nHets, nHomVar,
rate(nHets), inverseRate(nHets), ratio(nHets, nHomVar),
rate(nDeletions + nInsertions), inverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions));
}
private static List<String> HEADER =
Arrays.asList("nProcessedLoci", "nCalledLoci", "nRefLoci", "nVariantLoci",
"variantRate", "variantRatePerBp",
"nSNPs", "nDeletions", "nInsertions", "nComplex",
"nHomRefGenotypes", "nHetGenotypes", "nHomVarGenotypes",
"heterozygosity", "heterozygosityPerBp", "hetHomRatio",
"indelRate", "indelRatePerBp", "deletionInsertionRatio");
// making it a table
public List<String> getTableHeader() {
return HEADER;
}
public List<List<String>> getTableRows() {
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
}
}

View File

@ -0,0 +1,241 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextUtils;
import java.util.*;
/**
* Test routine for new VariantContext object
*/
public class VariantEval2Walker extends RodWalker<Integer, Integer> {
// todo -- add doc string
@Argument(shortName="select", doc="", required=false)
protected String[] SELECT_STRINGS = {};
// todo -- add doc string
@Argument(shortName="selectName", doc="", required=false)
protected String[] SELECT_NAMES = {};
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
protected String[] KNOWN_NAMES = {"dbsnp"};
private class EvaluationGroup extends HashMap<String, Set<VariantEvaluator>> {
// useful for typing
}
// todo -- generalize to multiple contexts, one for each eval
private HashMap<String, EvaluationGroup> contexts = new HashMap<String, EvaluationGroup>();
private List<VariantContextUtils.MatchExp> selectExps = null;
public void initialize() {
if ( SELECT_NAMES.length != SELECT_STRINGS.length )
throw new StingException("Inconsistent number of provided filter names and expressions.");
Map<String, String> map = new HashMap<String, String>();
for ( int i = 0; i < SELECT_NAMES.length; i++ ) { map.put(SELECT_NAMES[i], SELECT_STRINGS[i]); }
selectExps = VariantContextUtils.initializeMatchExps(map);
// setup contexts
// todo -- add selects
contexts.put("eval", createEvaluationGroup());
contexts.put("eval.filtered", createEvaluationGroup());
}
private EvaluationGroup createEvaluationGroup() {
EvaluationGroup group = new EvaluationGroup();
for ( String name : Arrays.asList("all", "known", "novel") ) {
group.put(name, new HashSet<VariantEvaluator>(Arrays.asList(new CountVariants())));
}
return group;
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
Map<String, VariantContext> allNamedVCs = getVariantContexts(context, tracker);
Map<String, VariantContext> evalNamedVCs = getEvalVCs(allNamedVCs);
Map<String, VariantContext> comps = getComparisonVCs(allNamedVCs);
for ( VariantEvaluator eval : getAllEvaluations() ) {
eval.update0(tracker, ref, context);
}
if ( evalNamedVCs.size() > 1 ) throw new StingException("VariantEval doesn't yet support for multiple independent eval tracks");
for ( Map.Entry<String, VariantContext> evalNameVC: evalNamedVCs.entrySet() ) {
String name = evalNameVC.getKey();
VariantContext vc = evalNameVC.getValue();
boolean isKnown = vcIsKnown(vc, allNamedVCs);
if ( vc.isFiltered() ) name = name + ".filtered";
EvaluationGroup group = contexts.get(name);
for ( Set<VariantEvaluator> evaluations : Arrays.asList(group.get("all"), group.get(isKnown ? "known" : "novel")) ) {
for ( VariantEvaluator evaluation : evaluations ) {
switch ( evaluation.getComparisonOrder() ) {
case 1:
evaluation.update1(vc, tracker, ref, context);
break;
case 2:
for ( VariantContext comp : comps.values() ) {
evaluation.update2(vc, comp, tracker, ref, context);
}
break;
default:
throw new StingException("BUG: Unexpected evaluation order " + evaluation);
}
}
}
}
return 0;
}
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> vcs) {
for ( VariantContext known : getKnownVCs(vcs).values() ) {
if ( known.isNotFiltered() && known.getType() == vc.getType() )
return true;
}
return false;
}
private Map<String, VariantContext> getEvalVCs(Map<String, VariantContext> vcs) {
return getVCsStartingWith(vcs, false);
}
private Map<String, VariantContext> getComparisonVCs(Map<String, VariantContext> vcs) {
return getVCsStartingWith(vcs, true);
}
private Map<String, VariantContext> getVCsStartingWith(Map<String, VariantContext> vcs, boolean notStartsWith) {
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
boolean startP = elt.getKey().startsWith("eval");
if ( (startP && ! notStartsWith) || (!startP && notStartsWith) ) {
map.put(elt.getKey(), elt.getValue());
}
}
return map;
}
private Map<String, VariantContext> getKnownVCs(Map<String, VariantContext> vcs) {
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
for ( String known1 : KNOWN_NAMES ) {
if ( elt.getKey().equals(known1) ) {
map.put(elt.getKey(), elt.getValue());
}
}
}
return map;
}
private List<String> getAllEvaluationNames() {
List<String> names = new ArrayList<String>();
if ( contexts.size() == 0 ) throw new IllegalStateException("Contexts shouldn't be sized 0 when calling getAllEvaluationNames()");
for ( VariantEvaluator eval : contexts.values().iterator().next().get("all") ) {
names.add(eval.getName());
}
return names;
}
private Map<String, VariantContext> getVariantContexts(AlignmentContext context, RefMetaDataTracker tracker) {
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
if ( tracker != null ) {
for ( RODRecordList<ReferenceOrderedDatum> rodList : tracker.getBoundRodTracks() ) {
boolean alreadyGrabbedOne = false;
for ( ReferenceOrderedDatum rec : rodList.getRecords() ) {
if ( rec.getLocation().getStart() == context.getLocation().getStart() ) {
// ignore things that span this location but started earlier
if ( alreadyGrabbedOne ) {
// can't handle this situation
;
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
} else {
VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec);
if ( vc != null ) {
alreadyGrabbedOne = true;
map.put(rec.getName(), vc);
}
}
}
}
}
}
return map;
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer point, Integer sum) {
return point + sum;
}
public VariantEvaluator getEvalByName(String name, Set<VariantEvaluator> s) {
for ( VariantEvaluator e : s )
if ( e.getName().equals(name) )
return e;
return null;
}
public List<VariantEvaluator> getAllEvaluations() {
List<VariantEvaluator> l = new ArrayList<VariantEvaluator>();
for ( EvaluationGroup group : contexts.values() ) {
for ( Set<VariantEvaluator> evals : group.values() ) {
l.addAll(evals);
}
}
return l;
}
public void onTraversalDone(Integer result) {
for ( String evalName : getAllEvaluationNames() ) {
boolean first = true;
for ( Map.Entry<String, EvaluationGroup> elt : contexts.entrySet() ) {
String contextName = elt.getKey();
EvaluationGroup group = elt.getValue();
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvalGroup : group.entrySet() ) {
String evalSubgroupName = namedEvalGroup.getKey();
VariantEvaluator eval = getEvalByName(evalName, namedEvalGroup.getValue());
String keyWord = contextName + "." + evalSubgroupName;
if ( first ) {
out.printf("%s\t%s\t%s%n", evalName, "context", Utils.join("\t", eval.getTableHeader()));
first = false;
}
for ( List<String> row : eval.getTableRows() )
out.printf("%s\t%s\t%s%n", evalName, keyWord, Utils.join("\t", row));
}
}
}
}
}

View File

@ -0,0 +1,49 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.List;
import java.util.ArrayList;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Jan 29, 2010
* Time: 3:38:02 PM
* To change this template use File | Settings | File Templates.
*/
public abstract class VariantEvaluator {
protected boolean accumulateInterestingSites = false;
protected boolean processedASite = false;
protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
public abstract String getName();
// do we want to keep track of things that are interesting
public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
public List<VariantContext> getInterestingSites() { return interestingSites; }
protected void addInterestingSite(String why, VariantContext vc) { interestingSites.add(vc); }
public boolean processedAnySites() { return processedASite; }
protected void markSiteAsProcessed() { processedASite = true; }
/** Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 */
public abstract int getComparisonOrder();
/** called at all sites, regardless of eval context itself; useful for counting processed bases */
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public void finalize() {}
public abstract String toString();
// making it a table
public abstract List<String> getTableHeader();
public abstract List<List<String>> getTableRows();
}

View File

@ -8,23 +8,10 @@ import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
import org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval.VariantEvaluation;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
import org.apache.commons.jexl.ExpressionFactory;
import org.apache.commons.jexl.Expression;
import org.apache.commons.jexl.JexlHelper;
import org.apache.commons.jexl.JexlContext;
import java.util.*;
@ -60,7 +47,7 @@ public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDe
if (vcfList != null) {
for (ReferenceOrderedDatum d : vcfList) {
RodVCF vcfRecord = (RodVCF)d;
vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
vc = VariantContextAdaptors.convertToVariantContext(vcfRecord);
break;
}
}

View File

@ -1,42 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import java.util.List;
import java.util.ArrayList;
public class CounterEvaluation implements VariantEvaluation {
private int numVariants = 0;
private int numCalls = 0;
private int numKnown = 0;
public CounterEvaluation() {}
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
numVariants += (variant != null && variant.isSNP()) ? 1 : 0;
numKnown += (variant != null && variant.isSNP() && !variant.getID().equals(".")) ? 1 : 0;
numCalls++;
}
public String getName() {
return "Counter";
}
public List<String> getResult() {
ArrayList<String> results = new ArrayList<String>();
double variantRate = ((double) numVariants)/((double) numCalls);
double knownVariantsPct = 100.0*((double) numKnown)/((double) numVariants);
results.add(String.format("regionSize=%d", numCalls));
results.add(String.format("variants=%d", numVariants));
results.add(String.format("variantsKnown=%d", numKnown));
results.add(String.format("knownVariantsPct=%f", knownVariantsPct));
results.add(String.format("variantRate=1/%d", Math.round(1.0/variantRate)));
return results;
}
}

View File

@ -1,153 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
import org.apache.commons.jexl.ExpressionFactory;
import org.apache.commons.jexl.Expression;
import org.apache.commons.jexl.JexlHelper;
import org.apache.commons.jexl.JexlContext;
import java.util.*;
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
public class NewVariantEval extends RodWalker<Integer, Integer> {
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
private String FILTER_STRING = "FILTER == false";
@Argument(fullName="sampleName", shortName="sample", doc="If the VCF file has multiple samples, evaluate only the specified sample", required=false)
private String SAMPLE_NAME = null;
private Expression filterExpression;
private HashSet<VariantEvaluation> evals;
public void initialize() {
try {
filterExpression = ExpressionFactory.createExpression(FILTER_STRING);
} catch (Exception e) {
throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax.");
}
try {
evals = new HashSet<VariantEvaluation>();
List<Class<? extends VariantEvaluation>> cevals = PackageUtils.getClassesImplementingInterface(VariantEvaluation.class);
for (Class ceval : cevals) {
out.printf("Analysis: %s%n", ceval.getName());
VariantEvaluation eval = (VariantEvaluation) ceval.newInstance();
evals.add(eval);
}
} catch (InstantiationException e) {
throw new StingException(e.getMessage());
} catch (IllegalAccessException e) {
throw new StingException(e.getMessage());
}
}
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("eval", null);
if ( rods != null ) {
RodVCF variant = (RodVCF) rods.getRecords().get(0);
Map<String, String> infoMap = new HashMap<String, String>(variant.mCurrentRecord.getInfoValues());
infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual()));
infoMap.put("FILTER", String.valueOf(variant.mCurrentRecord.isFiltered()));
infoMap.put("ID", String.valueOf(variant.mCurrentRecord.getID()));
for (String filterCode : variant.mCurrentRecord.getFilteringCodes()) {
infoMap.put(filterCode, "1");
}
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(infoMap);
try {
return (Boolean) filterExpression.evaluate(jContext);
} catch (Exception e) {
throw new StingException(e.getMessage());
}
}
return true;
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("eval", null);
RodVCF variant = (rods != null) ? (RodVCF) rods.getRecords().get(0) : null;
if (variant != null && SAMPLE_NAME != null && variant.hasGenotypeData()) {
variant = selectSample(SAMPLE_NAME, variant);
if (variant == null) {
throw new StingException(String.format("The sample '%s' is not present in the specified VCF", SAMPLE_NAME));
}
}
for (VariantEvaluation eval : evals) {
eval.update(variant, tracker, ref, context);
}
return 1;
}
private RodVCF selectSample(String sample, RodVCF variant) {
String[] samples = variant.getSampleNames();
for (int i = 0; i < samples.length; i++) {
if (samples[i].equalsIgnoreCase(sample)) {
List<VCFGenotypeRecord> genotypeRecs = new ArrayList<VCFGenotypeRecord>();
genotypeRecs.add(variant.mCurrentRecord.getVCFGenotypeRecords().get(i));
variant.mCurrentRecord = new VCFRecord(variant.getReferenceForSNP(),
variant.getLocation().getContig(),
(int) variant.getLocation().getStart(),
variant.getID(),
variant.mCurrentRecord.getAlternateAlleles(),
variant.getQual(),
variant.getFilterString(),
variant.getInfoValues(),
variant.mCurrentRecord.getGenotypeFormatString(),
genotypeRecs);
return variant;
}
}
return null;
}
public Integer reduceInit() {
return null;
}
public Integer reduce(Integer value, Integer sum) {
return null;
}
public void onTraversalDone(Integer sum) {
for (VariantEvaluation eval : evals) {
List<String> results = eval.getResult();
for (String result : results) {
out.printf("%s:%s%n", eval.getName(), result);
}
}
}
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.playground.utils.NamedTable;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.List;
import java.util.ArrayList;
public class SubstitutionEvaluation implements VariantEvaluation {
private NamedTable substitutions;
public SubstitutionEvaluation() {
substitutions = new NamedTable();
}
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (variant != null) {
List<String> altAlleles = variant.getAlternateAlleleList();
String altAllele = altAlleles.get(0);
String refAllele = String.format("%c", BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()));
substitutions.increment(refAllele, altAllele);
}
}
public String getName() {
return "Substitution";
}
public List<String> getResult() {
ArrayList<String> results = new ArrayList<String>();
int transitions = 0;
int transversions = 0;
int unknown = 0;
for (char rbase : BaseUtils.BASES) {
String refAllele = String.format("%c", rbase);
for (char abase : BaseUtils.BASES) {
String altAllele = String.format("%c", abase);
if (BaseUtils.isTransition((byte)rbase, (byte)abase)) {
transitions += substitutions.get(refAllele, altAllele);
} else if (BaseUtils.isTransversion((byte)rbase, (byte)abase)) {
transversions += substitutions.get(refAllele, altAllele);
} else {
unknown += substitutions.get(refAllele, altAllele);
}
}
}
results.add(String.format("%n%s", substitutions.toString()));
results.add(String.format("transitions=%d", transitions));
results.add(String.format("transversions=%d", transversions));
results.add(String.format("unknown=%d", unknown));
results.add(String.format("titv=%f", ((double) transitions)/((double) transversions)));
return results;
}
}

View File

@ -1,18 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import java.util.List;
public interface VariantEvaluation {
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
//public Integer map(
public String getName();
public List<String> getResult();
}

View File

@ -257,6 +257,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
//System.out.printf("Tracker at %s is %s, ref is %s, skip is %d mapped is %d%n", context.getLocation(), tracker, ref, context.getSkippedBases(), nMappedSites);
nMappedSites += context.getSkippedBases();
//System.out.printf("Tracker at %s is %s, ref is %s%n", context.getLocation(), tracker, ref);