VariantEval2 test framework implemented; Kiran is experimenting with the system. Not for use by anyone else. VariantContext appears to work well; I'll release it next week for general use following docs of the functions. Removing newvarianteval and other classes to avoid any future confusion. Update to TraverseLoci and RodLocusView to simplify a few functions and to correct some minor errors. All tests pass without modification.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2748 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
236764b249
commit
3d45457595
|
|
@ -159,17 +159,9 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
* @return
|
||||
*/
|
||||
private long getSkippedBases( GenomeLoc currentPos ) {
|
||||
long skippedBases = 0;
|
||||
|
||||
if ( lastLoc == null ) {
|
||||
// special case -- we're at the start
|
||||
//System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc());
|
||||
GenomeLoc firstLoc = shard.getGenomeLocs().get(0);
|
||||
skippedBases = currentPos.getStart() - firstLoc.getStart();
|
||||
} else {
|
||||
//System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc);
|
||||
skippedBases = currentPos.minus(lastLoc) - 1;
|
||||
}
|
||||
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
|
||||
Long compStop = lastLoc == null ? shard.getGenomeLocs().get(0).getStart() - 1 : lastLoc.getStop();
|
||||
long skippedBases = currentPos.getStart() - compStop - 1;
|
||||
|
||||
if ( skippedBases < -1 ) { // minus 1 value is ok
|
||||
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ public class TraverseLoci extends TraversalEngine {
|
|||
RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||
long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
// no sense in making the call if you don't have anything interesting to say
|
||||
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
|
||||
M x = locusWalker.map(null, null, ac);
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
for (ReferenceOrderedDatum d : dbsnpList) {
|
||||
rodDbSNP dbsnpRecord = (rodDbSNP)d;
|
||||
if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
|
||||
VariantContext vc = VariantContextAdaptors.dbsnpToVariantContext(dbsnpRecord);
|
||||
VariantContext vc = VariantContextAdaptors.convertToVariantContext(dbsnpRecord);
|
||||
if ( vc != null ) {
|
||||
n++;
|
||||
System.out.printf("%s%n", vc);
|
||||
|
|
@ -40,7 +40,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
|||
int n = 0;
|
||||
for (ReferenceOrderedDatum d : vcfList) {
|
||||
RodVCF vcfRecord = (RodVCF)d;
|
||||
VariantContext vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
|
||||
VariantContext vc = VariantContextAdaptors.convertToVariantContext(vcfRecord);
|
||||
if ( vc != null ) {
|
||||
n++;
|
||||
System.out.printf("%s%n", vc);
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ import java.util.*;
|
|||
*/
|
||||
public class VariantContext extends AttributedObject {
|
||||
private GenomeLoc loc;
|
||||
Type type = Type.UNDETERMINED;
|
||||
private Type type = Type.UNDETERMINED;
|
||||
private Set<Allele> alleles = new HashSet<Allele>();
|
||||
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||
Set<Object> filters = new HashSet<Object>();
|
||||
private Set<Object> filters = new HashSet<Object>();
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -2,20 +2,39 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
|||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class VariantContextAdaptors {
|
||||
public static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) {
|
||||
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
|
||||
return convertToVariantContext(variantContainingObject) != null;
|
||||
}
|
||||
|
||||
public static VariantContext convertToVariantContext(Object variantContainingObject) {
|
||||
if ( variantContainingObject instanceof rodDbSNP )
|
||||
return dbsnpToVariantContext((rodDbSNP)variantContainingObject);
|
||||
else if ( variantContainingObject instanceof RodVCF )
|
||||
return vcfToVariantContext(((RodVCF)variantContainingObject).getRecord());
|
||||
else if ( variantContainingObject instanceof VCFRecord )
|
||||
return vcfToVariantContext((VCFRecord)variantContainingObject);
|
||||
else
|
||||
return null;
|
||||
//throw new IllegalArgumentException("Cannot convert object " + variantContainingObject + " of class " + variantContainingObject.getClass() + " to a variant context");
|
||||
|
||||
}
|
||||
|
||||
private static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) {
|
||||
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
|
||||
VariantContext vc = new VariantContext(dbsnp.getLocation());
|
||||
|
||||
// add the reference allele
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
|
||||
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -25,7 +44,7 @@ public class VariantContextAdaptors {
|
|||
// add all of the alt alleles
|
||||
for ( String alt : dbsnp.getAlternateAlleleList() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
vc.addAllele(new Allele(alt, false));
|
||||
|
|
@ -37,7 +56,7 @@ public class VariantContextAdaptors {
|
|||
return null; // can't handle anything else
|
||||
}
|
||||
|
||||
public static VariantContext vcfToVariantContext(RodVCF vcf) {
|
||||
private static VariantContext vcfToVariantContext(VCFRecord vcf) {
|
||||
if ( vcf.isSNP() || vcf.isIndel() ) {
|
||||
VariantContext vc = new VariantContext(vcf.getLocation());
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,43 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
|||
|
||||
import java.util.*;
|
||||
import org.apache.commons.jexl.*;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
|
||||
public class VariantContextUtils {
|
||||
/** */
|
||||
public static class MatchExp {
|
||||
String name;
|
||||
String expStr;
|
||||
Expression exp;
|
||||
|
||||
public MatchExp(String name, String str, Expression exp) {
|
||||
this.name = name;
|
||||
this.expStr = str;
|
||||
this.exp = exp;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<MatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
|
||||
List<MatchExp> exps = new ArrayList<MatchExp>();
|
||||
|
||||
for ( Map.Entry<String, String> elt : names_and_exps.entrySet() ) {
|
||||
String name = elt.getKey();
|
||||
String expStr = elt.getValue();
|
||||
|
||||
if ( name == null || expStr == null ) throw new IllegalArgumentException("Cannot create null expressions : " + name + " " + expStr);
|
||||
try {
|
||||
Expression filterExpression = ExpressionFactory.createExpression(expStr);
|
||||
exps.add(new MatchExp(name, expStr, filterExpression));
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.");
|
||||
}
|
||||
}
|
||||
|
||||
return exps;
|
||||
}
|
||||
|
||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,113 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class CountVariants extends VariantEvaluator {
|
||||
int nProcessedLoci = 0;
|
||||
int nCalledLoci = 0;
|
||||
int nVariantLoci = 0;
|
||||
int nRefLoci = 0;
|
||||
|
||||
int nSNPs = 0;
|
||||
int nInsertions = 0;
|
||||
int nDeletions = 0;
|
||||
int nComplex = 0;
|
||||
|
||||
int nNoCalls = 0;
|
||||
int nHets = 0;
|
||||
int nHomRef = 0;
|
||||
int nHomVar = 0;
|
||||
|
||||
private double rate(long n) {
|
||||
return n / (1.0 * Math.max(nProcessedLoci, 1));
|
||||
}
|
||||
|
||||
private long inverseRate(long n) {
|
||||
return n == 0 ? 0 : nProcessedLoci / Math.max(n, 1);
|
||||
}
|
||||
|
||||
private double ratio(long num, long denom) {
|
||||
return ((double)num) / (Math.max(denom, 1));
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Counter";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + 1;
|
||||
}
|
||||
|
||||
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//nProcessedLoci++;
|
||||
nCalledLoci++;
|
||||
|
||||
if ( vc1.isVariant() ) nVariantLoci++;
|
||||
switch ( vc1.getType() ) {
|
||||
case NO_VARIATION: nRefLoci++; break;
|
||||
case SNP: nSNPs++; break;
|
||||
case INDEL:
|
||||
if ( vc1.isInsertion() ) nInsertions++; else nDeletions++;
|
||||
break;
|
||||
case MIXED: nComplex++; break;
|
||||
}
|
||||
|
||||
for ( Genotype g : vc1.getGenotypes().values() ) {
|
||||
switch ( g.getType() ) {
|
||||
case NO_CALL: nNoCalls++; break;
|
||||
case HOM_REF: nHomRef++; break;
|
||||
case HET: nHets++; break;
|
||||
case HOM_VAR: nHomVar++; break;
|
||||
default: throw new StingException("BUG: Unexpected genotype type: " + g);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Counter: " + summaryLine();
|
||||
}
|
||||
|
||||
private String summaryLine() {
|
||||
return String.format("%d %d %d %d " +
|
||||
"%.2e %d " +
|
||||
"%d %d %d %d " +
|
||||
"%d %d %d " +
|
||||
"%.2e %d %.2f " +
|
||||
"%.2f %d %.2f",
|
||||
nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci,
|
||||
rate(nVariantLoci), inverseRate(nVariantLoci),
|
||||
nSNPs, nDeletions, nInsertions, nComplex,
|
||||
nHomRef, nHets, nHomVar,
|
||||
rate(nHets), inverseRate(nHets), ratio(nHets, nHomVar),
|
||||
rate(nDeletions + nInsertions), inverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions));
|
||||
}
|
||||
|
||||
private static List<String> HEADER =
|
||||
Arrays.asList("nProcessedLoci", "nCalledLoci", "nRefLoci", "nVariantLoci",
|
||||
"variantRate", "variantRatePerBp",
|
||||
"nSNPs", "nDeletions", "nInsertions", "nComplex",
|
||||
"nHomRefGenotypes", "nHetGenotypes", "nHomVarGenotypes",
|
||||
"heterozygosity", "heterozygosityPerBp", "hetHomRatio",
|
||||
"indelRate", "indelRatePerBp", "deletionInsertionRatio");
|
||||
|
||||
// making it a table
|
||||
public List<String> getTableHeader() {
|
||||
return HEADER;
|
||||
}
|
||||
|
||||
public List<List<String>> getTableRows() {
|
||||
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Test routine for new VariantContext object
|
||||
*/
|
||||
public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
||||
// todo -- add doc string
|
||||
@Argument(shortName="select", doc="", required=false)
|
||||
protected String[] SELECT_STRINGS = {};
|
||||
// todo -- add doc string
|
||||
@Argument(shortName="selectName", doc="", required=false)
|
||||
protected String[] SELECT_NAMES = {};
|
||||
|
||||
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||
protected String[] KNOWN_NAMES = {"dbsnp"};
|
||||
|
||||
|
||||
private class EvaluationGroup extends HashMap<String, Set<VariantEvaluator>> {
|
||||
// useful for typing
|
||||
}
|
||||
|
||||
// todo -- generalize to multiple contexts, one for each eval
|
||||
private HashMap<String, EvaluationGroup> contexts = new HashMap<String, EvaluationGroup>();
|
||||
private List<VariantContextUtils.MatchExp> selectExps = null;
|
||||
|
||||
public void initialize() {
|
||||
if ( SELECT_NAMES.length != SELECT_STRINGS.length )
|
||||
throw new StingException("Inconsistent number of provided filter names and expressions.");
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
for ( int i = 0; i < SELECT_NAMES.length; i++ ) { map.put(SELECT_NAMES[i], SELECT_STRINGS[i]); }
|
||||
|
||||
selectExps = VariantContextUtils.initializeMatchExps(map);
|
||||
|
||||
// setup contexts
|
||||
// todo -- add selects
|
||||
contexts.put("eval", createEvaluationGroup());
|
||||
contexts.put("eval.filtered", createEvaluationGroup());
|
||||
}
|
||||
|
||||
private EvaluationGroup createEvaluationGroup() {
|
||||
EvaluationGroup group = new EvaluationGroup();
|
||||
|
||||
for ( String name : Arrays.asList("all", "known", "novel") ) {
|
||||
group.put(name, new HashSet<VariantEvaluator>(Arrays.asList(new CountVariants())));
|
||||
}
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
|
||||
|
||||
Map<String, VariantContext> allNamedVCs = getVariantContexts(context, tracker);
|
||||
Map<String, VariantContext> evalNamedVCs = getEvalVCs(allNamedVCs);
|
||||
Map<String, VariantContext> comps = getComparisonVCs(allNamedVCs);
|
||||
|
||||
for ( VariantEvaluator eval : getAllEvaluations() ) {
|
||||
eval.update0(tracker, ref, context);
|
||||
}
|
||||
|
||||
if ( evalNamedVCs.size() > 1 ) throw new StingException("VariantEval doesn't yet support for multiple independent eval tracks");
|
||||
for ( Map.Entry<String, VariantContext> evalNameVC: evalNamedVCs.entrySet() ) {
|
||||
String name = evalNameVC.getKey();
|
||||
VariantContext vc = evalNameVC.getValue();
|
||||
boolean isKnown = vcIsKnown(vc, allNamedVCs);
|
||||
|
||||
if ( vc.isFiltered() ) name = name + ".filtered";
|
||||
EvaluationGroup group = contexts.get(name);
|
||||
|
||||
for ( Set<VariantEvaluator> evaluations : Arrays.asList(group.get("all"), group.get(isKnown ? "known" : "novel")) ) {
|
||||
for ( VariantEvaluator evaluation : evaluations ) {
|
||||
switch ( evaluation.getComparisonOrder() ) {
|
||||
case 1:
|
||||
evaluation.update1(vc, tracker, ref, context);
|
||||
break;
|
||||
case 2:
|
||||
for ( VariantContext comp : comps.values() ) {
|
||||
evaluation.update2(vc, comp, tracker, ref, context);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new StingException("BUG: Unexpected evaluation order " + evaluation);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> vcs) {
|
||||
for ( VariantContext known : getKnownVCs(vcs).values() ) {
|
||||
if ( known.isNotFiltered() && known.getType() == vc.getType() )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
private Map<String, VariantContext> getEvalVCs(Map<String, VariantContext> vcs) {
|
||||
return getVCsStartingWith(vcs, false);
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getComparisonVCs(Map<String, VariantContext> vcs) {
|
||||
return getVCsStartingWith(vcs, true);
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getVCsStartingWith(Map<String, VariantContext> vcs, boolean notStartsWith) {
|
||||
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||
|
||||
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
|
||||
boolean startP = elt.getKey().startsWith("eval");
|
||||
|
||||
if ( (startP && ! notStartsWith) || (!startP && notStartsWith) ) {
|
||||
map.put(elt.getKey(), elt.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private Map<String, VariantContext> getKnownVCs(Map<String, VariantContext> vcs) {
|
||||
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||
|
||||
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
|
||||
for ( String known1 : KNOWN_NAMES ) {
|
||||
if ( elt.getKey().equals(known1) ) {
|
||||
map.put(elt.getKey(), elt.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private List<String> getAllEvaluationNames() {
|
||||
List<String> names = new ArrayList<String>();
|
||||
if ( contexts.size() == 0 ) throw new IllegalStateException("Contexts shouldn't be sized 0 when calling getAllEvaluationNames()");
|
||||
|
||||
for ( VariantEvaluator eval : contexts.values().iterator().next().get("all") ) {
|
||||
names.add(eval.getName());
|
||||
}
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
|
||||
private Map<String, VariantContext> getVariantContexts(AlignmentContext context, RefMetaDataTracker tracker) {
|
||||
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||
|
||||
if ( tracker != null ) {
|
||||
for ( RODRecordList<ReferenceOrderedDatum> rodList : tracker.getBoundRodTracks() ) {
|
||||
boolean alreadyGrabbedOne = false;
|
||||
|
||||
for ( ReferenceOrderedDatum rec : rodList.getRecords() ) {
|
||||
if ( rec.getLocation().getStart() == context.getLocation().getStart() ) {
|
||||
// ignore things that span this location but started earlier
|
||||
if ( alreadyGrabbedOne ) {
|
||||
// can't handle this situation
|
||||
;
|
||||
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
|
||||
} else {
|
||||
VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec);
|
||||
if ( vc != null ) {
|
||||
alreadyGrabbedOne = true;
|
||||
map.put(rec.getName(), vc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer point, Integer sum) {
|
||||
return point + sum;
|
||||
}
|
||||
|
||||
public VariantEvaluator getEvalByName(String name, Set<VariantEvaluator> s) {
|
||||
for ( VariantEvaluator e : s )
|
||||
if ( e.getName().equals(name) )
|
||||
return e;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public List<VariantEvaluator> getAllEvaluations() {
|
||||
List<VariantEvaluator> l = new ArrayList<VariantEvaluator>();
|
||||
|
||||
for ( EvaluationGroup group : contexts.values() ) {
|
||||
for ( Set<VariantEvaluator> evals : group.values() ) {
|
||||
l.addAll(evals);
|
||||
}
|
||||
}
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
for ( String evalName : getAllEvaluationNames() ) {
|
||||
boolean first = true;
|
||||
for ( Map.Entry<String, EvaluationGroup> elt : contexts.entrySet() ) {
|
||||
String contextName = elt.getKey();
|
||||
EvaluationGroup group = elt.getValue();
|
||||
|
||||
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvalGroup : group.entrySet() ) {
|
||||
String evalSubgroupName = namedEvalGroup.getKey();
|
||||
VariantEvaluator eval = getEvalByName(evalName, namedEvalGroup.getValue());
|
||||
String keyWord = contextName + "." + evalSubgroupName;
|
||||
if ( first ) {
|
||||
out.printf("%s\t%s\t%s%n", evalName, "context", Utils.join("\t", eval.getTableHeader()));
|
||||
first = false;
|
||||
}
|
||||
|
||||
for ( List<String> row : eval.getTableRows() )
|
||||
out.printf("%s\t%s\t%s%n", evalName, keyWord, Utils.join("\t", row));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: Jan 29, 2010
|
||||
* Time: 3:38:02 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class VariantEvaluator {
|
||||
protected boolean accumulateInterestingSites = false;
|
||||
protected boolean processedASite = false;
|
||||
protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
|
||||
|
||||
public abstract String getName();
|
||||
|
||||
// do we want to keep track of things that are interesting
|
||||
public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
|
||||
public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
|
||||
public List<VariantContext> getInterestingSites() { return interestingSites; }
|
||||
protected void addInterestingSite(String why, VariantContext vc) { interestingSites.add(vc); }
|
||||
|
||||
public boolean processedAnySites() { return processedASite; }
|
||||
protected void markSiteAsProcessed() { processedASite = true; }
|
||||
|
||||
/** Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 */
|
||||
public abstract int getComparisonOrder();
|
||||
|
||||
/** called at all sites, regardless of eval context itself; useful for counting processed bases */
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||
|
||||
public void finalize() {}
|
||||
|
||||
public abstract String toString();
|
||||
|
||||
// making it a table
|
||||
public abstract List<String> getTableHeader();
|
||||
public abstract List<List<String>> getTableRows();
|
||||
}
|
||||
|
|
@ -8,23 +8,10 @@ import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.PackageUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval.VariantEvaluation;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
|
||||
import org.apache.commons.jexl.ExpressionFactory;
|
||||
import org.apache.commons.jexl.Expression;
|
||||
import org.apache.commons.jexl.JexlHelper;
|
||||
import org.apache.commons.jexl.JexlContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -60,7 +47,7 @@ public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDe
|
|||
if (vcfList != null) {
|
||||
for (ReferenceOrderedDatum d : vcfList) {
|
||||
RodVCF vcfRecord = (RodVCF)d;
|
||||
vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
|
||||
vc = VariantContextAdaptors.convertToVariantContext(vcfRecord);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class CounterEvaluation implements VariantEvaluation {
|
||||
private int numVariants = 0;
|
||||
private int numCalls = 0;
|
||||
private int numKnown = 0;
|
||||
|
||||
public CounterEvaluation() {}
|
||||
|
||||
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
numVariants += (variant != null && variant.isSNP()) ? 1 : 0;
|
||||
numKnown += (variant != null && variant.isSNP() && !variant.getID().equals(".")) ? 1 : 0;
|
||||
numCalls++;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Counter";
|
||||
}
|
||||
|
||||
public List<String> getResult() {
|
||||
ArrayList<String> results = new ArrayList<String>();
|
||||
|
||||
double variantRate = ((double) numVariants)/((double) numCalls);
|
||||
double knownVariantsPct = 100.0*((double) numKnown)/((double) numVariants);
|
||||
|
||||
results.add(String.format("regionSize=%d", numCalls));
|
||||
results.add(String.format("variants=%d", numVariants));
|
||||
results.add(String.format("variantsKnown=%d", numKnown));
|
||||
results.add(String.format("knownVariantsPct=%f", knownVariantsPct));
|
||||
results.add(String.format("variantRate=1/%d", Math.round(1.0/variantRate)));
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.PackageUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
|
||||
import org.apache.commons.jexl.ExpressionFactory;
|
||||
import org.apache.commons.jexl.Expression;
|
||||
import org.apache.commons.jexl.JexlHelper;
|
||||
import org.apache.commons.jexl.JexlContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
|
||||
public class NewVariantEval extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
||||
private String FILTER_STRING = "FILTER == false";
|
||||
|
||||
@Argument(fullName="sampleName", shortName="sample", doc="If the VCF file has multiple samples, evaluate only the specified sample", required=false)
|
||||
private String SAMPLE_NAME = null;
|
||||
|
||||
private Expression filterExpression;
|
||||
private HashSet<VariantEvaluation> evals;
|
||||
|
||||
public void initialize() {
|
||||
try {
|
||||
filterExpression = ExpressionFactory.createExpression(FILTER_STRING);
|
||||
} catch (Exception e) {
|
||||
throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax.");
|
||||
}
|
||||
|
||||
try {
|
||||
evals = new HashSet<VariantEvaluation>();
|
||||
|
||||
List<Class<? extends VariantEvaluation>> cevals = PackageUtils.getClassesImplementingInterface(VariantEvaluation.class);
|
||||
for (Class ceval : cevals) {
|
||||
out.printf("Analysis: %s%n", ceval.getName());
|
||||
|
||||
VariantEvaluation eval = (VariantEvaluation) ceval.newInstance();
|
||||
evals.add(eval);
|
||||
}
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException(e.getMessage());
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("eval", null);
|
||||
|
||||
if ( rods != null ) {
|
||||
RodVCF variant = (RodVCF) rods.getRecords().get(0);
|
||||
|
||||
Map<String, String> infoMap = new HashMap<String, String>(variant.mCurrentRecord.getInfoValues());
|
||||
infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual()));
|
||||
infoMap.put("FILTER", String.valueOf(variant.mCurrentRecord.isFiltered()));
|
||||
infoMap.put("ID", String.valueOf(variant.mCurrentRecord.getID()));
|
||||
|
||||
for (String filterCode : variant.mCurrentRecord.getFilteringCodes()) {
|
||||
infoMap.put(filterCode, "1");
|
||||
}
|
||||
|
||||
JexlContext jContext = JexlHelper.createContext();
|
||||
jContext.setVars(infoMap);
|
||||
|
||||
try {
|
||||
return (Boolean) filterExpression.evaluate(jContext);
|
||||
} catch (Exception e) {
|
||||
throw new StingException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("eval", null);
|
||||
RodVCF variant = (rods != null) ? (RodVCF) rods.getRecords().get(0) : null;
|
||||
|
||||
if (variant != null && SAMPLE_NAME != null && variant.hasGenotypeData()) {
|
||||
variant = selectSample(SAMPLE_NAME, variant);
|
||||
|
||||
if (variant == null) {
|
||||
throw new StingException(String.format("The sample '%s' is not present in the specified VCF", SAMPLE_NAME));
|
||||
}
|
||||
}
|
||||
|
||||
for (VariantEvaluation eval : evals) {
|
||||
eval.update(variant, tracker, ref, context);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
private RodVCF selectSample(String sample, RodVCF variant) {
|
||||
String[] samples = variant.getSampleNames();
|
||||
|
||||
for (int i = 0; i < samples.length; i++) {
|
||||
if (samples[i].equalsIgnoreCase(sample)) {
|
||||
List<VCFGenotypeRecord> genotypeRecs = new ArrayList<VCFGenotypeRecord>();
|
||||
genotypeRecs.add(variant.mCurrentRecord.getVCFGenotypeRecords().get(i));
|
||||
|
||||
variant.mCurrentRecord = new VCFRecord(variant.getReferenceForSNP(),
|
||||
variant.getLocation().getContig(),
|
||||
(int) variant.getLocation().getStart(),
|
||||
variant.getID(),
|
||||
variant.mCurrentRecord.getAlternateAlleles(),
|
||||
variant.getQual(),
|
||||
variant.getFilterString(),
|
||||
variant.getInfoValues(),
|
||||
variant.mCurrentRecord.getGenotypeFormatString(),
|
||||
genotypeRecs);
|
||||
|
||||
return variant;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
for (VariantEvaluation eval : evals) {
|
||||
List<String> results = eval.getResult();
|
||||
|
||||
for (String result : results) {
|
||||
out.printf("%s:%s%n", eval.getName(), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.playground.utils.NamedTable;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class SubstitutionEvaluation implements VariantEvaluation {
|
||||
private NamedTable substitutions;
|
||||
|
||||
public SubstitutionEvaluation() {
|
||||
substitutions = new NamedTable();
|
||||
}
|
||||
|
||||
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (variant != null) {
|
||||
List<String> altAlleles = variant.getAlternateAlleleList();
|
||||
String altAllele = altAlleles.get(0);
|
||||
String refAllele = String.format("%c", BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()));
|
||||
|
||||
substitutions.increment(refAllele, altAllele);
|
||||
}
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Substitution";
|
||||
}
|
||||
|
||||
public List<String> getResult() {
|
||||
ArrayList<String> results = new ArrayList<String>();
|
||||
|
||||
int transitions = 0;
|
||||
int transversions = 0;
|
||||
int unknown = 0;
|
||||
|
||||
for (char rbase : BaseUtils.BASES) {
|
||||
String refAllele = String.format("%c", rbase);
|
||||
|
||||
for (char abase : BaseUtils.BASES) {
|
||||
String altAllele = String.format("%c", abase);
|
||||
|
||||
if (BaseUtils.isTransition((byte)rbase, (byte)abase)) {
|
||||
transitions += substitutions.get(refAllele, altAllele);
|
||||
} else if (BaseUtils.isTransversion((byte)rbase, (byte)abase)) {
|
||||
transversions += substitutions.get(refAllele, altAllele);
|
||||
} else {
|
||||
unknown += substitutions.get(refAllele, altAllele);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.add(String.format("%n%s", substitutions.toString()));
|
||||
results.add(String.format("transitions=%d", transitions));
|
||||
results.add(String.format("transversions=%d", transversions));
|
||||
results.add(String.format("unknown=%d", unknown));
|
||||
results.add(String.format("titv=%f", ((double) transitions)/((double) transversions)));
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface VariantEvaluation {
|
||||
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
|
||||
|
||||
//public Integer map(
|
||||
|
||||
public String getName();
|
||||
|
||||
public List<String> getResult();
|
||||
}
|
||||
|
|
@ -257,6 +257,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//System.out.printf("Tracker at %s is %s, ref is %s, skip is %d mapped is %d%n", context.getLocation(), tracker, ref, context.getSkippedBases(), nMappedSites);
|
||||
nMappedSites += context.getSkippedBases();
|
||||
|
||||
//System.out.printf("Tracker at %s is %s, ref is %s%n", context.getLocation(), tracker, ref);
|
||||
|
|
|
|||
Loading…
Reference in New Issue