VariantEval2 test framework implemented; Kiran is experimenting with the system. Not for use by anyone else. VariantContext appears to work well; I'll release it next week for general use following docs of the functions. Removing newvarianteval and other classes to avoid any future confusion. Update to TraverseLoci and RodLocusView to simplify a few functions and to correct some minor errors. All tests pass without modification.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2748 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
236764b249
commit
3d45457595
|
|
@ -159,17 +159,9 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private long getSkippedBases( GenomeLoc currentPos ) {
|
private long getSkippedBases( GenomeLoc currentPos ) {
|
||||||
long skippedBases = 0;
|
// the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped
|
||||||
|
Long compStop = lastLoc == null ? shard.getGenomeLocs().get(0).getStart() - 1 : lastLoc.getStop();
|
||||||
if ( lastLoc == null ) {
|
long skippedBases = currentPos.getStart() - compStop - 1;
|
||||||
// special case -- we're at the start
|
|
||||||
//System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc());
|
|
||||||
GenomeLoc firstLoc = shard.getGenomeLocs().get(0);
|
|
||||||
skippedBases = currentPos.getStart() - firstLoc.getStart();
|
|
||||||
} else {
|
|
||||||
//System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc);
|
|
||||||
skippedBases = currentPos.minus(lastLoc) - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( skippedBases < -1 ) { // minus 1 value is ok
|
if ( skippedBases < -1 ) { // minus 1 value is ok
|
||||||
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
|
throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s",
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,6 @@ public class TraverseLoci extends TraversalEngine {
|
||||||
RodLocusView rodLocusView = (RodLocusView)locusView;
|
RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||||
long nSkipped = rodLocusView.getLastSkippedBases();
|
long nSkipped = rodLocusView.getLastSkippedBases();
|
||||||
if ( nSkipped > 0 ) {
|
if ( nSkipped > 0 ) {
|
||||||
// no sense in making the call if you don't have anything interesting to say
|
|
||||||
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
|
||||||
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
|
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped);
|
||||||
M x = locusWalker.map(null, null, ac);
|
M x = locusWalker.map(null, null, ac);
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
||||||
for (ReferenceOrderedDatum d : dbsnpList) {
|
for (ReferenceOrderedDatum d : dbsnpList) {
|
||||||
rodDbSNP dbsnpRecord = (rodDbSNP)d;
|
rodDbSNP dbsnpRecord = (rodDbSNP)d;
|
||||||
if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
|
if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
|
||||||
VariantContext vc = VariantContextAdaptors.dbsnpToVariantContext(dbsnpRecord);
|
VariantContext vc = VariantContextAdaptors.convertToVariantContext(dbsnpRecord);
|
||||||
if ( vc != null ) {
|
if ( vc != null ) {
|
||||||
n++;
|
n++;
|
||||||
System.out.printf("%s%n", vc);
|
System.out.printf("%s%n", vc);
|
||||||
|
|
@ -40,7 +40,7 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (ReferenceOrderedDatum d : vcfList) {
|
for (ReferenceOrderedDatum d : vcfList) {
|
||||||
RodVCF vcfRecord = (RodVCF)d;
|
RodVCF vcfRecord = (RodVCF)d;
|
||||||
VariantContext vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
|
VariantContext vc = VariantContextAdaptors.convertToVariantContext(vcfRecord);
|
||||||
if ( vc != null ) {
|
if ( vc != null ) {
|
||||||
n++;
|
n++;
|
||||||
System.out.printf("%s%n", vc);
|
System.out.printf("%s%n", vc);
|
||||||
|
|
|
||||||
|
|
@ -17,10 +17,10 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class VariantContext extends AttributedObject {
|
public class VariantContext extends AttributedObject {
|
||||||
private GenomeLoc loc;
|
private GenomeLoc loc;
|
||||||
Type type = Type.UNDETERMINED;
|
private Type type = Type.UNDETERMINED;
|
||||||
private Set<Allele> alleles = new HashSet<Allele>();
|
private Set<Allele> alleles = new HashSet<Allele>();
|
||||||
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||||
Set<Object> filters = new HashSet<Object>();
|
private Set<Object> filters = new HashSet<Object>();
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -2,20 +2,39 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class VariantContextAdaptors {
|
public class VariantContextAdaptors {
|
||||||
public static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) {
|
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
|
||||||
|
return convertToVariantContext(variantContainingObject) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContext convertToVariantContext(Object variantContainingObject) {
|
||||||
|
if ( variantContainingObject instanceof rodDbSNP )
|
||||||
|
return dbsnpToVariantContext((rodDbSNP)variantContainingObject);
|
||||||
|
else if ( variantContainingObject instanceof RodVCF )
|
||||||
|
return vcfToVariantContext(((RodVCF)variantContainingObject).getRecord());
|
||||||
|
else if ( variantContainingObject instanceof VCFRecord )
|
||||||
|
return vcfToVariantContext((VCFRecord)variantContainingObject);
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
//throw new IllegalArgumentException("Cannot convert object " + variantContainingObject + " of class " + variantContainingObject.getClass() + " to a variant context");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) {
|
||||||
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
|
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
|
||||||
VariantContext vc = new VariantContext(dbsnp.getLocation());
|
VariantContext vc = new VariantContext(dbsnp.getLocation());
|
||||||
|
|
||||||
// add the reference allele
|
// add the reference allele
|
||||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
|
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
|
||||||
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -25,7 +44,7 @@ public class VariantContextAdaptors {
|
||||||
// add all of the alt alleles
|
// add all of the alt alleles
|
||||||
for ( String alt : dbsnp.getAlternateAlleleList() ) {
|
for ( String alt : dbsnp.getAlternateAlleleList() ) {
|
||||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||||
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
vc.addAllele(new Allele(alt, false));
|
vc.addAllele(new Allele(alt, false));
|
||||||
|
|
@ -37,7 +56,7 @@ public class VariantContextAdaptors {
|
||||||
return null; // can't handle anything else
|
return null; // can't handle anything else
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext vcfToVariantContext(RodVCF vcf) {
|
private static VariantContext vcfToVariantContext(VCFRecord vcf) {
|
||||||
if ( vcf.isSNP() || vcf.isIndel() ) {
|
if ( vcf.isSNP() || vcf.isIndel() ) {
|
||||||
VariantContext vc = new VariantContext(vcf.getLocation());
|
VariantContext vc = new VariantContext(vcf.getLocation());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,43 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import org.apache.commons.jexl.*;
|
import org.apache.commons.jexl.*;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
|
||||||
public class VariantContextUtils {
|
public class VariantContextUtils {
|
||||||
|
/** */
|
||||||
|
public static class MatchExp {
|
||||||
|
String name;
|
||||||
|
String expStr;
|
||||||
|
Expression exp;
|
||||||
|
|
||||||
|
public MatchExp(String name, String str, Expression exp) {
|
||||||
|
this.name = name;
|
||||||
|
this.expStr = str;
|
||||||
|
this.exp = exp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<MatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
|
||||||
|
List<MatchExp> exps = new ArrayList<MatchExp>();
|
||||||
|
|
||||||
|
for ( Map.Entry<String, String> elt : names_and_exps.entrySet() ) {
|
||||||
|
String name = elt.getKey();
|
||||||
|
String expStr = elt.getValue();
|
||||||
|
|
||||||
|
if ( name == null || expStr == null ) throw new IllegalArgumentException("Cannot create null expressions : " + name + " " + expStr);
|
||||||
|
try {
|
||||||
|
Expression filterExpression = ExpressionFactory.createExpression(expStr);
|
||||||
|
exps.add(new MatchExp(name, expStr, filterExpression));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new StingException("Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return exps;
|
||||||
|
}
|
||||||
|
|
||||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class CountVariants extends VariantEvaluator {
|
||||||
|
int nProcessedLoci = 0;
|
||||||
|
int nCalledLoci = 0;
|
||||||
|
int nVariantLoci = 0;
|
||||||
|
int nRefLoci = 0;
|
||||||
|
|
||||||
|
int nSNPs = 0;
|
||||||
|
int nInsertions = 0;
|
||||||
|
int nDeletions = 0;
|
||||||
|
int nComplex = 0;
|
||||||
|
|
||||||
|
int nNoCalls = 0;
|
||||||
|
int nHets = 0;
|
||||||
|
int nHomRef = 0;
|
||||||
|
int nHomVar = 0;
|
||||||
|
|
||||||
|
private double rate(long n) {
|
||||||
|
return n / (1.0 * Math.max(nProcessedLoci, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private long inverseRate(long n) {
|
||||||
|
return n == 0 ? 0 : nProcessedLoci / Math.max(n, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private double ratio(long num, long denom) {
|
||||||
|
return ((double)num) / (Math.max(denom, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Counter";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
nProcessedLoci += context.getSkippedBases() + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
//nProcessedLoci++;
|
||||||
|
nCalledLoci++;
|
||||||
|
|
||||||
|
if ( vc1.isVariant() ) nVariantLoci++;
|
||||||
|
switch ( vc1.getType() ) {
|
||||||
|
case NO_VARIATION: nRefLoci++; break;
|
||||||
|
case SNP: nSNPs++; break;
|
||||||
|
case INDEL:
|
||||||
|
if ( vc1.isInsertion() ) nInsertions++; else nDeletions++;
|
||||||
|
break;
|
||||||
|
case MIXED: nComplex++; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( Genotype g : vc1.getGenotypes().values() ) {
|
||||||
|
switch ( g.getType() ) {
|
||||||
|
case NO_CALL: nNoCalls++; break;
|
||||||
|
case HOM_REF: nHomRef++; break;
|
||||||
|
case HET: nHets++; break;
|
||||||
|
case HOM_VAR: nHomVar++; break;
|
||||||
|
default: throw new StingException("BUG: Unexpected genotype type: " + g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "Counter: " + summaryLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String summaryLine() {
|
||||||
|
return String.format("%d %d %d %d " +
|
||||||
|
"%.2e %d " +
|
||||||
|
"%d %d %d %d " +
|
||||||
|
"%d %d %d " +
|
||||||
|
"%.2e %d %.2f " +
|
||||||
|
"%.2f %d %.2f",
|
||||||
|
nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci,
|
||||||
|
rate(nVariantLoci), inverseRate(nVariantLoci),
|
||||||
|
nSNPs, nDeletions, nInsertions, nComplex,
|
||||||
|
nHomRef, nHets, nHomVar,
|
||||||
|
rate(nHets), inverseRate(nHets), ratio(nHets, nHomVar),
|
||||||
|
rate(nDeletions + nInsertions), inverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> HEADER =
|
||||||
|
Arrays.asList("nProcessedLoci", "nCalledLoci", "nRefLoci", "nVariantLoci",
|
||||||
|
"variantRate", "variantRatePerBp",
|
||||||
|
"nSNPs", "nDeletions", "nInsertions", "nComplex",
|
||||||
|
"nHomRefGenotypes", "nHetGenotypes", "nHomVarGenotypes",
|
||||||
|
"heterozygosity", "heterozygosityPerBp", "hetHomRatio",
|
||||||
|
"indelRate", "indelRatePerBp", "deletionInsertionRatio");
|
||||||
|
|
||||||
|
// making it a table
|
||||||
|
public List<String> getTableHeader() {
|
||||||
|
return HEADER;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<List<String>> getTableRows() {
|
||||||
|
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,241 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test routine for new VariantContext object
|
||||||
|
*/
|
||||||
|
public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
||||||
|
// todo -- add doc string
|
||||||
|
@Argument(shortName="select", doc="", required=false)
|
||||||
|
protected String[] SELECT_STRINGS = {};
|
||||||
|
// todo -- add doc string
|
||||||
|
@Argument(shortName="selectName", doc="", required=false)
|
||||||
|
protected String[] SELECT_NAMES = {};
|
||||||
|
|
||||||
|
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)
|
||||||
|
protected String[] KNOWN_NAMES = {"dbsnp"};
|
||||||
|
|
||||||
|
|
||||||
|
private class EvaluationGroup extends HashMap<String, Set<VariantEvaluator>> {
|
||||||
|
// useful for typing
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo -- generalize to multiple contexts, one for each eval
|
||||||
|
private HashMap<String, EvaluationGroup> contexts = new HashMap<String, EvaluationGroup>();
|
||||||
|
private List<VariantContextUtils.MatchExp> selectExps = null;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if ( SELECT_NAMES.length != SELECT_STRINGS.length )
|
||||||
|
throw new StingException("Inconsistent number of provided filter names and expressions.");
|
||||||
|
Map<String, String> map = new HashMap<String, String>();
|
||||||
|
for ( int i = 0; i < SELECT_NAMES.length; i++ ) { map.put(SELECT_NAMES[i], SELECT_STRINGS[i]); }
|
||||||
|
|
||||||
|
selectExps = VariantContextUtils.initializeMatchExps(map);
|
||||||
|
|
||||||
|
// setup contexts
|
||||||
|
// todo -- add selects
|
||||||
|
contexts.put("eval", createEvaluationGroup());
|
||||||
|
contexts.put("eval.filtered", createEvaluationGroup());
|
||||||
|
}
|
||||||
|
|
||||||
|
private EvaluationGroup createEvaluationGroup() {
|
||||||
|
EvaluationGroup group = new EvaluationGroup();
|
||||||
|
|
||||||
|
for ( String name : Arrays.asList("all", "known", "novel") ) {
|
||||||
|
group.put(name, new HashSet<VariantEvaluator>(Arrays.asList(new CountVariants())));
|
||||||
|
}
|
||||||
|
|
||||||
|
return group;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
|
||||||
|
|
||||||
|
Map<String, VariantContext> allNamedVCs = getVariantContexts(context, tracker);
|
||||||
|
Map<String, VariantContext> evalNamedVCs = getEvalVCs(allNamedVCs);
|
||||||
|
Map<String, VariantContext> comps = getComparisonVCs(allNamedVCs);
|
||||||
|
|
||||||
|
for ( VariantEvaluator eval : getAllEvaluations() ) {
|
||||||
|
eval.update0(tracker, ref, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( evalNamedVCs.size() > 1 ) throw new StingException("VariantEval doesn't yet support for multiple independent eval tracks");
|
||||||
|
for ( Map.Entry<String, VariantContext> evalNameVC: evalNamedVCs.entrySet() ) {
|
||||||
|
String name = evalNameVC.getKey();
|
||||||
|
VariantContext vc = evalNameVC.getValue();
|
||||||
|
boolean isKnown = vcIsKnown(vc, allNamedVCs);
|
||||||
|
|
||||||
|
if ( vc.isFiltered() ) name = name + ".filtered";
|
||||||
|
EvaluationGroup group = contexts.get(name);
|
||||||
|
|
||||||
|
for ( Set<VariantEvaluator> evaluations : Arrays.asList(group.get("all"), group.get(isKnown ? "known" : "novel")) ) {
|
||||||
|
for ( VariantEvaluator evaluation : evaluations ) {
|
||||||
|
switch ( evaluation.getComparisonOrder() ) {
|
||||||
|
case 1:
|
||||||
|
evaluation.update1(vc, tracker, ref, context);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
for ( VariantContext comp : comps.values() ) {
|
||||||
|
evaluation.update2(vc, comp, tracker, ref, context);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new StingException("BUG: Unexpected evaluation order " + evaluation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> vcs) {
|
||||||
|
for ( VariantContext known : getKnownVCs(vcs).values() ) {
|
||||||
|
if ( known.isNotFiltered() && known.getType() == vc.getType() )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Map<String, VariantContext> getEvalVCs(Map<String, VariantContext> vcs) {
|
||||||
|
return getVCsStartingWith(vcs, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, VariantContext> getComparisonVCs(Map<String, VariantContext> vcs) {
|
||||||
|
return getVCsStartingWith(vcs, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, VariantContext> getVCsStartingWith(Map<String, VariantContext> vcs, boolean notStartsWith) {
|
||||||
|
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||||
|
|
||||||
|
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
|
||||||
|
boolean startP = elt.getKey().startsWith("eval");
|
||||||
|
|
||||||
|
if ( (startP && ! notStartsWith) || (!startP && notStartsWith) ) {
|
||||||
|
map.put(elt.getKey(), elt.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, VariantContext> getKnownVCs(Map<String, VariantContext> vcs) {
|
||||||
|
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||||
|
|
||||||
|
for ( Map.Entry<String, VariantContext> elt : vcs.entrySet() ) {
|
||||||
|
for ( String known1 : KNOWN_NAMES ) {
|
||||||
|
if ( elt.getKey().equals(known1) ) {
|
||||||
|
map.put(elt.getKey(), elt.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getAllEvaluationNames() {
|
||||||
|
List<String> names = new ArrayList<String>();
|
||||||
|
if ( contexts.size() == 0 ) throw new IllegalStateException("Contexts shouldn't be sized 0 when calling getAllEvaluationNames()");
|
||||||
|
|
||||||
|
for ( VariantEvaluator eval : contexts.values().iterator().next().get("all") ) {
|
||||||
|
names.add(eval.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
return names;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Map<String, VariantContext> getVariantContexts(AlignmentContext context, RefMetaDataTracker tracker) {
|
||||||
|
Map<String, VariantContext> map = new HashMap<String, VariantContext>();
|
||||||
|
|
||||||
|
if ( tracker != null ) {
|
||||||
|
for ( RODRecordList<ReferenceOrderedDatum> rodList : tracker.getBoundRodTracks() ) {
|
||||||
|
boolean alreadyGrabbedOne = false;
|
||||||
|
|
||||||
|
for ( ReferenceOrderedDatum rec : rodList.getRecords() ) {
|
||||||
|
if ( rec.getLocation().getStart() == context.getLocation().getStart() ) {
|
||||||
|
// ignore things that span this location but started earlier
|
||||||
|
if ( alreadyGrabbedOne ) {
|
||||||
|
// can't handle this situation
|
||||||
|
;
|
||||||
|
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
|
||||||
|
} else {
|
||||||
|
VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec);
|
||||||
|
if ( vc != null ) {
|
||||||
|
alreadyGrabbedOne = true;
|
||||||
|
map.put(rec.getName(), vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer point, Integer sum) {
|
||||||
|
return point + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantEvaluator getEvalByName(String name, Set<VariantEvaluator> s) {
|
||||||
|
for ( VariantEvaluator e : s )
|
||||||
|
if ( e.getName().equals(name) )
|
||||||
|
return e;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<VariantEvaluator> getAllEvaluations() {
|
||||||
|
List<VariantEvaluator> l = new ArrayList<VariantEvaluator>();
|
||||||
|
|
||||||
|
for ( EvaluationGroup group : contexts.values() ) {
|
||||||
|
for ( Set<VariantEvaluator> evals : group.values() ) {
|
||||||
|
l.addAll(evals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
for ( String evalName : getAllEvaluationNames() ) {
|
||||||
|
boolean first = true;
|
||||||
|
for ( Map.Entry<String, EvaluationGroup> elt : contexts.entrySet() ) {
|
||||||
|
String contextName = elt.getKey();
|
||||||
|
EvaluationGroup group = elt.getValue();
|
||||||
|
|
||||||
|
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvalGroup : group.entrySet() ) {
|
||||||
|
String evalSubgroupName = namedEvalGroup.getKey();
|
||||||
|
VariantEvaluator eval = getEvalByName(evalName, namedEvalGroup.getValue());
|
||||||
|
String keyWord = contextName + "." + evalSubgroupName;
|
||||||
|
if ( first ) {
|
||||||
|
out.printf("%s\t%s\t%s%n", evalName, "context", Utils.join("\t", eval.getTableHeader()));
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( List<String> row : eval.getTableRows() )
|
||||||
|
out.printf("%s\t%s\t%s%n", evalName, keyWord, Utils.join("\t", row));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: depristo
|
||||||
|
* Date: Jan 29, 2010
|
||||||
|
* Time: 3:38:02 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public abstract class VariantEvaluator {
|
||||||
|
protected boolean accumulateInterestingSites = false;
|
||||||
|
protected boolean processedASite = false;
|
||||||
|
protected List<VariantContext> interestingSites = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
|
public abstract String getName();
|
||||||
|
|
||||||
|
// do we want to keep track of things that are interesting
|
||||||
|
public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; }
|
||||||
|
public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; }
|
||||||
|
public List<VariantContext> getInterestingSites() { return interestingSites; }
|
||||||
|
protected void addInterestingSite(String why, VariantContext vc) { interestingSites.add(vc); }
|
||||||
|
|
||||||
|
public boolean processedAnySites() { return processedASite; }
|
||||||
|
protected void markSiteAsProcessed() { processedASite = true; }
|
||||||
|
|
||||||
|
/** Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 */
|
||||||
|
public abstract int getComparisonOrder();
|
||||||
|
|
||||||
|
/** called at all sites, regardless of eval context itself; useful for counting processed bases */
|
||||||
|
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||||
|
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||||
|
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
|
||||||
|
|
||||||
|
public void finalize() {}
|
||||||
|
|
||||||
|
public abstract String toString();
|
||||||
|
|
||||||
|
// making it a table
|
||||||
|
public abstract List<String> getTableHeader();
|
||||||
|
public abstract List<List<String>> getTableRows();
|
||||||
|
}
|
||||||
|
|
@ -8,23 +8,10 @@ import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.PackageUtils;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval.VariantEvaluation;
|
|
||||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
|
||||||
import org.apache.commons.jexl.ExpressionFactory;
|
|
||||||
import org.apache.commons.jexl.Expression;
|
|
||||||
import org.apache.commons.jexl.JexlHelper;
|
|
||||||
import org.apache.commons.jexl.JexlContext;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -60,7 +47,7 @@ public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDe
|
||||||
if (vcfList != null) {
|
if (vcfList != null) {
|
||||||
for (ReferenceOrderedDatum d : vcfList) {
|
for (ReferenceOrderedDatum d : vcfList) {
|
||||||
RodVCF vcfRecord = (RodVCF)d;
|
RodVCF vcfRecord = (RodVCF)d;
|
||||||
vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
|
vc = VariantContextAdaptors.convertToVariantContext(vcfRecord);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,42 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
public class CounterEvaluation implements VariantEvaluation {
|
|
||||||
private int numVariants = 0;
|
|
||||||
private int numCalls = 0;
|
|
||||||
private int numKnown = 0;
|
|
||||||
|
|
||||||
public CounterEvaluation() {}
|
|
||||||
|
|
||||||
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
numVariants += (variant != null && variant.isSNP()) ? 1 : 0;
|
|
||||||
numKnown += (variant != null && variant.isSNP() && !variant.getID().equals(".")) ? 1 : 0;
|
|
||||||
numCalls++;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return "Counter";
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getResult() {
|
|
||||||
ArrayList<String> results = new ArrayList<String>();
|
|
||||||
|
|
||||||
double variantRate = ((double) numVariants)/((double) numCalls);
|
|
||||||
double knownVariantsPct = 100.0*((double) numKnown)/((double) numVariants);
|
|
||||||
|
|
||||||
results.add(String.format("regionSize=%d", numCalls));
|
|
||||||
results.add(String.format("variants=%d", numVariants));
|
|
||||||
results.add(String.format("variantsKnown=%d", numKnown));
|
|
||||||
results.add(String.format("knownVariantsPct=%f", knownVariantsPct));
|
|
||||||
results.add(String.format("variantRate=1/%d", Math.round(1.0/variantRate)));
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,153 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.PackageUtils;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
|
|
||||||
import org.apache.commons.jexl.ExpressionFactory;
|
|
||||||
import org.apache.commons.jexl.Expression;
|
|
||||||
import org.apache.commons.jexl.JexlHelper;
|
|
||||||
import org.apache.commons.jexl.JexlContext;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
|
|
||||||
public class NewVariantEval extends RodWalker<Integer, Integer> {
|
|
||||||
@Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false)
|
|
||||||
private String FILTER_STRING = "FILTER == false";
|
|
||||||
|
|
||||||
@Argument(fullName="sampleName", shortName="sample", doc="If the VCF file has multiple samples, evaluate only the specified sample", required=false)
|
|
||||||
private String SAMPLE_NAME = null;
|
|
||||||
|
|
||||||
private Expression filterExpression;
|
|
||||||
private HashSet<VariantEvaluation> evals;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
try {
|
|
||||||
filterExpression = ExpressionFactory.createExpression(FILTER_STRING);
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax.");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
evals = new HashSet<VariantEvaluation>();
|
|
||||||
|
|
||||||
List<Class<? extends VariantEvaluation>> cevals = PackageUtils.getClassesImplementingInterface(VariantEvaluation.class);
|
|
||||||
for (Class ceval : cevals) {
|
|
||||||
out.printf("Analysis: %s%n", ceval.getName());
|
|
||||||
|
|
||||||
VariantEvaluation eval = (VariantEvaluation) ceval.newInstance();
|
|
||||||
evals.add(eval);
|
|
||||||
}
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new StingException(e.getMessage());
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new StingException(e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("eval", null);
|
|
||||||
|
|
||||||
if ( rods != null ) {
|
|
||||||
RodVCF variant = (RodVCF) rods.getRecords().get(0);
|
|
||||||
|
|
||||||
Map<String, String> infoMap = new HashMap<String, String>(variant.mCurrentRecord.getInfoValues());
|
|
||||||
infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual()));
|
|
||||||
infoMap.put("FILTER", String.valueOf(variant.mCurrentRecord.isFiltered()));
|
|
||||||
infoMap.put("ID", String.valueOf(variant.mCurrentRecord.getID()));
|
|
||||||
|
|
||||||
for (String filterCode : variant.mCurrentRecord.getFilteringCodes()) {
|
|
||||||
infoMap.put(filterCode, "1");
|
|
||||||
}
|
|
||||||
|
|
||||||
JexlContext jContext = JexlHelper.createContext();
|
|
||||||
jContext.setVars(infoMap);
|
|
||||||
|
|
||||||
try {
|
|
||||||
return (Boolean) filterExpression.evaluate(jContext);
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new StingException(e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData("eval", null);
|
|
||||||
RodVCF variant = (rods != null) ? (RodVCF) rods.getRecords().get(0) : null;
|
|
||||||
|
|
||||||
if (variant != null && SAMPLE_NAME != null && variant.hasGenotypeData()) {
|
|
||||||
variant = selectSample(SAMPLE_NAME, variant);
|
|
||||||
|
|
||||||
if (variant == null) {
|
|
||||||
throw new StingException(String.format("The sample '%s' is not present in the specified VCF", SAMPLE_NAME));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (VariantEvaluation eval : evals) {
|
|
||||||
eval.update(variant, tracker, ref, context);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private RodVCF selectSample(String sample, RodVCF variant) {
|
|
||||||
String[] samples = variant.getSampleNames();
|
|
||||||
|
|
||||||
for (int i = 0; i < samples.length; i++) {
|
|
||||||
if (samples[i].equalsIgnoreCase(sample)) {
|
|
||||||
List<VCFGenotypeRecord> genotypeRecs = new ArrayList<VCFGenotypeRecord>();
|
|
||||||
genotypeRecs.add(variant.mCurrentRecord.getVCFGenotypeRecords().get(i));
|
|
||||||
|
|
||||||
variant.mCurrentRecord = new VCFRecord(variant.getReferenceForSNP(),
|
|
||||||
variant.getLocation().getContig(),
|
|
||||||
(int) variant.getLocation().getStart(),
|
|
||||||
variant.getID(),
|
|
||||||
variant.mCurrentRecord.getAlternateAlleles(),
|
|
||||||
variant.getQual(),
|
|
||||||
variant.getFilterString(),
|
|
||||||
variant.getInfoValues(),
|
|
||||||
variant.mCurrentRecord.getGenotypeFormatString(),
|
|
||||||
genotypeRecs);
|
|
||||||
|
|
||||||
return variant;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(Integer sum) {
|
|
||||||
for (VariantEvaluation eval : evals) {
|
|
||||||
List<String> results = eval.getResult();
|
|
||||||
|
|
||||||
for (String result : results) {
|
|
||||||
out.printf("%s:%s%n", eval.getName(), result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.playground.utils.NamedTable;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
public class SubstitutionEvaluation implements VariantEvaluation {
|
|
||||||
private NamedTable substitutions;
|
|
||||||
|
|
||||||
public SubstitutionEvaluation() {
|
|
||||||
substitutions = new NamedTable();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if (variant != null) {
|
|
||||||
List<String> altAlleles = variant.getAlternateAlleleList();
|
|
||||||
String altAllele = altAlleles.get(0);
|
|
||||||
String refAllele = String.format("%c", BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()));
|
|
||||||
|
|
||||||
substitutions.increment(refAllele, altAllele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return "Substitution";
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getResult() {
|
|
||||||
ArrayList<String> results = new ArrayList<String>();
|
|
||||||
|
|
||||||
int transitions = 0;
|
|
||||||
int transversions = 0;
|
|
||||||
int unknown = 0;
|
|
||||||
|
|
||||||
for (char rbase : BaseUtils.BASES) {
|
|
||||||
String refAllele = String.format("%c", rbase);
|
|
||||||
|
|
||||||
for (char abase : BaseUtils.BASES) {
|
|
||||||
String altAllele = String.format("%c", abase);
|
|
||||||
|
|
||||||
if (BaseUtils.isTransition((byte)rbase, (byte)abase)) {
|
|
||||||
transitions += substitutions.get(refAllele, altAllele);
|
|
||||||
} else if (BaseUtils.isTransversion((byte)rbase, (byte)abase)) {
|
|
||||||
transversions += substitutions.get(refAllele, altAllele);
|
|
||||||
} else {
|
|
||||||
unknown += substitutions.get(refAllele, altAllele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
results.add(String.format("%n%s", substitutions.toString()));
|
|
||||||
results.add(String.format("transitions=%d", transitions));
|
|
||||||
results.add(String.format("transversions=%d", transversions));
|
|
||||||
results.add(String.format("unknown=%d", unknown));
|
|
||||||
results.add(String.format("titv=%f", ((double) transitions)/((double) transversions)));
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public interface VariantEvaluation {
|
|
||||||
public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
|
|
||||||
|
|
||||||
//public Integer map(
|
|
||||||
|
|
||||||
public String getName();
|
|
||||||
|
|
||||||
public List<String> getResult();
|
|
||||||
}
|
|
||||||
|
|
@ -257,6 +257,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
//System.out.printf("Tracker at %s is %s, ref is %s, skip is %d mapped is %d%n", context.getLocation(), tracker, ref, context.getSkippedBases(), nMappedSites);
|
||||||
nMappedSites += context.getSkippedBases();
|
nMappedSites += context.getSkippedBases();
|
||||||
|
|
||||||
//System.out.printf("Tracker at %s is %s, ref is %s%n", context.getLocation(), tracker, ref);
|
//System.out.printf("Tracker at %s is %s, ref is %s%n", context.getLocation(), tracker, ref);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue