Massive improvements to VE2 infrastructure. Now supports VCF writing of interesting sites; multiple comp and eval tracks. Eric will be taking it over and expanding functionality over the next few weeks until it's ready to replace VE1

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2832 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-02-12 15:26:52 +00:00
parent 197dd540b5
commit 5f74fffa02
4 changed files with 231 additions and 147 deletions

View File

@ -30,7 +30,7 @@ public class DbSNPPercentage extends VariantEvaluator {
} }
public String getName() { public String getName() {
return "dbsnp_percentage"; return "dbOverlap";
} }
public int getComparisonOrder() { public int getComparisonOrder() {
@ -43,6 +43,15 @@ public class DbSNPPercentage extends VariantEvaluator {
public long nConcordant() { return nConcordant; } public long nConcordant() { return nConcordant; }
public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); } public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); }
/**
* What fraction of the evaluated site variants were also found in the db?
*
* @return
*/
public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); }
public String toString() { public String toString() {
return getName() + ": " + summaryLine(); return getName() + ": " + summaryLine();
} }
@ -53,7 +62,8 @@ public class DbSNPPercentage extends VariantEvaluator {
} }
private static List<String> HEADER = private static List<String> HEADER =
Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant", "n_novel_snps", "dbsnp_rate", "concordance_rate"); Arrays.asList("n_dbsnps", "n_eval_snps", "n_overlapping_snps", "n_concordant",
"n_novel_snps", "percent_eval_in_comp", "concordance_rate");
// making it a table // making it a table
public List<String> getTableHeader() { public List<String> getTableHeader() {
@ -82,15 +92,6 @@ public class DbSNPPercentage extends VariantEvaluator {
return false; return false;
} }
/**
* What fraction of the evaluated site variants were also found in the db?
*
* @return
*/
public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); }
public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext dbsnp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered(); boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
boolean evalIsGood = eval != null && eval.isSNP(); boolean evalIsGood = eval != null && eval.isSNP();

View File

@ -40,6 +40,10 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
if ( vc1 != null ) updateTiTv(vc1, false); if ( vc1 != null ) updateTiTv(vc1, false);
if ( vc2 != null ) updateTiTv(vc2, true); if ( vc2 != null ) updateTiTv(vc2, true);
//if ( vc1 == null && vc2 != null && vc2.isSNP() && vc2.isBiallelic() )
// System.out.printf("VC2 = %s%n", vc2);
//if ( vc2 != null && vc2.getName().equals("dbsnp") )
return null; // we don't capture any intersting sites return null; // we don't capture any intersting sites
} }

View File

@ -21,11 +21,10 @@ import java.io.File;
// todo -- evalations should support comment lines // todo -- evalations should support comment lines
// todo -- add Mendelian variable explanations (nDeNovo and nMissingTransmissions) // todo -- add Mendelian variable explanations (nDeNovo and nMissingTransmissions)
// todo -- interesting sites should support VCF generation, so that FN, FP, DeNovo, etc calls get put into a single VCF and
// todo -- an explanation added to the INFO field as to why it showed up there.
// //
// todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects // todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects
//
// todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers) // todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers)
// todo -- multiple sample concordance tool (genotypes in eval vs. genotypes in truth) // todo -- multiple sample concordance tool (genotypes in eval vs. genotypes in truth)
// todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval). Needs to process subset of samples in true (pools) // todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval). Needs to process subset of samples in true (pools)
@ -33,10 +32,18 @@ import java.io.File;
// todo -- HWEs // todo -- HWEs
// todo -- Validation data analysis from VE1? What is it and should we transition it over? // todo -- Validation data analysis from VE1? What is it and should we transition it over?
// todo -- indel metrics [count of sizes in/del should be in CountVariants] // todo -- indel metrics [count of sizes in/del should be in CountVariants]
//
// todo -- Performance
// todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields // todo -- create JEXL context implementing object that simply looks up values for JEXL evaluations. Throws error for unknown fields
// //
//
// todo -- port over SNP density evaluator. // todo -- port over SNP density evaluator.
// todo -- make it work with intervals correctly
//
// todo -- counts of snps per target [target name, gene, etc]
// todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record] // todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record]
@ -58,6 +65,25 @@ import java.io.File;
// todo -- write or find a simple way to organize the table like output of variant eval 2. A generic table of strings? // todo -- write or find a simple way to organize the table like output of variant eval 2. A generic table of strings?
// //
// todo Extend VariantEval, our general-purpose tool for SNP evaluation, to differentiate Ti/Tv at CpG islands and also
// todo classify (and count) variants into coding, non-coding, synonomous/non-symonomous, 2/4 fold degenerate sites, etc.
// todo Assume that the incoming VCF has the annotations (you don't need to do this) but VE2 should split up results by
// todo these catogies automatically (using the default selects)
//
// todo -- We agreed to report two standard values for variant evaluation from Êhere out. ÊOne, we will continue to report
// todo -- the dbSNP 129 rate. Additionally, we will start to report the % of variants found that have already been seen in
// todo -- 1000 Genomes. ÊThis should be implemented as another standard comp_1kg binding, pointing to only variants
// todo -- discovered and released by 1KG. Might need to make this data set ourselves and keep it in GATK/data like
// todo -- dbsnp rod
//
// todo -- aux. plotting routines for VE2
//
// todo -- Provide separate dbsnp rates for het only calls and any call where there is at least one hom-var genotype,
// todo -- since hets are much more likely to be errors
// todo -- Add Heng's hom run metrics
/** /**
* Test routine for new VariantContext object * Test routine for new VariantContext object
*/ */
@ -91,6 +117,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
@Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false) @Argument(shortName="outputVCF", fullName="InterestingSitesVCF", doc="If provided, interesting sites emitted to this vcf and the INFO field annotated as to why they are interesting", required=false)
protected String outputVCF = null; protected String outputVCF = null;
/** Right now we will only be looking at SNPS */
EnumSet<VariantContext.Type> ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP);
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// private walker data // private walker data
@ -98,22 +127,48 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
/** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */ /** private class holding all of the information about a single evaluation group (e.g., for eval ROD) */
private class EvaluationContext extends HashMap<String, Set<VariantEvaluator>> { private class EvaluationContext implements Comparable<EvaluationContext> {
// useful for typing // useful for typing
public String trackName, contextName; public String evalTrackName, compTrackName, novelty, filtered;
public boolean enableInterestingSiteCaptures = false; public boolean enableInterestingSiteCaptures = false;
VariantContextUtils.JexlVCMatchExp selectExp; VariantContextUtils.JexlVCMatchExp selectExp;
Set<VariantEvaluator> evaluations;
public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp, boolean enableInterestingSiteCaptures) { public boolean isIgnoringFilters() { return filtered.equals(RAW_SET_NAME); }
this.trackName = trackName; public boolean requiresFiltered() { return filtered.equals(FILTERED_SET_NAME); }
this.contextName = contextName; public boolean requiresNotFiltered() { return filtered.equals(RETAINED_SET_NAME); }
public boolean isIgnoringNovelty() { return novelty.equals(ALL_SET_NAME); }
public boolean requiresNovel() { return novelty.equals(NOVEL_SET_NAME); }
public boolean requiresKnown() { return novelty.equals(KNOWN_SET_NAME); }
public boolean isSelected() { return selectExp == null; }
public String getDisplayName() {
return Utils.join(".", Arrays.asList(evalTrackName, compTrackName, selectExp == null ? "all" : selectExp.name, filtered, novelty));
}
public int compareTo(EvaluationContext other) {
return this.getDisplayName().compareTo(other.getDisplayName());
}
public EvaluationContext( String evalName, String compName, String novelty, String filtered, VariantContextUtils.JexlVCMatchExp selectExp ) {
this.evalTrackName = evalName;
this.compTrackName = compName;
this.novelty = novelty;
this.filtered = filtered;
this.selectExp = selectExp; this.selectExp = selectExp;
this.enableInterestingSiteCaptures = enableInterestingSiteCaptures; this.enableInterestingSiteCaptures = selectExp == null;
this.evaluations = instantiateEvalationsSet();
} }
} }
private HashMap<String, EvaluationContext> contexts = new HashMap<String, EvaluationContext>(); private List<EvaluationContext> contexts = null;
// lists of all comp and eval ROD track names
private Set<String> compNames = new HashSet<String>(); private Set<String> compNames = new HashSet<String>();
private Set<String> evalNames = new HashSet<String>();
private List<String> variantEvaluationNames = new ArrayList<String>();
private static String RAW_SET_NAME = "raw"; private static String RAW_SET_NAME = "raw";
private static String RETAINED_SET_NAME = "called"; private static String RETAINED_SET_NAME = "called";
@ -122,8 +177,16 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
private static String KNOWN_SET_NAME = "known"; private static String KNOWN_SET_NAME = "known";
private static String NOVEL_SET_NAME = "novel"; private static String NOVEL_SET_NAME = "novel";
private final static String CONTEXT_HEADER = "eval.comp.select.filter.novelty";
private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length;
private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS];
static {
int i = 0;
for ( String elt : CONTEXT_HEADER.split("\\.") )
nameSizes[i++] = elt.length();
}
// Dynamically determined variantEvaluation classes // Dynamically determined variantEvaluation classes
private List<String> variantEvaluationNames = new ArrayList<String>();
private List<Class<? extends VariantEvaluator>> evaluationClasses = null; private List<Class<? extends VariantEvaluator>> evaluationClasses = null;
/** output writer for interesting sites */ /** output writer for interesting sites */
@ -142,10 +205,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
if ( d.getName().startsWith("eval") ) { if ( d.getName().startsWith("eval") ) {
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) { evalNames.add(d.getName());
addNewContext(d.getName(), d.getName() + "." + e.name, e);
}
addNewContext(d.getName(), d.getName() + ".all", null);
} else if ( d.getName().startsWith("dbsnp") || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) { } else if ( d.getName().startsWith("dbsnp") || d.getName().startsWith("hapmap") || d.getName().startsWith("comp") ) {
compNames.add(d.getName()); compNames.add(d.getName());
} else { } else {
@ -153,12 +213,14 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
} }
} }
contexts = initializeEvaluationContexts(evalNames, compNames, selectExps);
determineContextNamePartSizes(); determineContextNamePartSizes();
if ( outputVCF != null ) if ( outputVCF != null )
writer = new VCFWriter(new File(outputVCF)); writer = new VCFWriter(new File(outputVCF));
} }
private void determineAllEvalations() { private void determineAllEvalations() {
evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class); evaluationClasses = PackageUtils.getClassesImplementingInterface(VariantEvaluator.class);
for ( VariantEvaluator e : instantiateEvalationsSet() ) { for ( VariantEvaluator e : instantiateEvalationsSet() ) {
@ -170,6 +232,33 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
Collections.sort(variantEvaluationNames); Collections.sort(variantEvaluationNames);
} }
private <T> List<T> append(List<T> selectExps, T elt) {
List<T> l = new ArrayList<T>(selectExps);
l.add(elt);
return l;
}
private List<EvaluationContext> initializeEvaluationContexts(Set<String> evalNames, Set<String> compNames, List<VariantContextUtils.JexlVCMatchExp> selectExps) {
List<EvaluationContext> contexts = new ArrayList<EvaluationContext>();
selectExps = append(selectExps, null);
for ( String evalName : evalNames ) {
for ( String compName : compNames ) {
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
for ( String novelty : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
EvaluationContext context = new EvaluationContext(evalName, compName, novelty, filteredName, e);
contexts.add(context);
}
}
}
}
}
Collections.sort(contexts);
return contexts;
}
private Set<VariantEvaluator> instantiateEvalationsSet() { private Set<VariantEvaluator> instantiateEvalationsSet() {
Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>(); Set<VariantEvaluator> evals = new HashSet<VariantEvaluator>();
Object[] args = new Object[]{this}; Object[] args = new Object[]{this};
@ -194,24 +283,11 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
return evals; return evals;
} }
private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp, selectExp == null);
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
for ( String subname : Arrays.asList(ALL_SET_NAME, KNOWN_SET_NAME, NOVEL_SET_NAME) ) {
String name = subname + "." + filteredName;
//System.out.printf("Creating group name: " + name);
group.put(name, instantiateEvalationsSet());
//group.put(name, instantiateEvalationsSet(subname == ALL_SET_NAME && filteredName == RETAINED_SET_NAME, trackName + "." + (selectExp == null ? "all" : selectExp.name) + "." + name));
}
}
contexts.put(contextName, group); private boolean captureInterestingSitesOfEvalSet(EvaluationContext group) {
}
private boolean captureInterestingSitesOfEvalSet(String name) {
//System.out.printf("checking %s%n", name); //System.out.printf("checking %s%n", name);
return name.contains(ALL_SET_NAME + "." + RETAINED_SET_NAME); return group.requiresNotFiltered() && group.isIgnoringNovelty();
} }
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@ -220,24 +296,24 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// todo -- call a single function to build a map from track name -> variant context / null for all
// -- eval + comp names. Use this data structure to get data throughout rest of the loops here
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases()); //System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
if ( ref == null ) if ( ref == null )
return 0; return 0;
Collection<VariantContext> comps = getCompVariantContexts(tracker, context); Map<String, VariantContext> vcs = getVariantContexts(tracker, context);
//Collection<VariantContext> comps = getCompVariantContexts(tracker, context);
// to enable walking over pairs where eval or comps have no elements // to enable walking over pairs where eval or comps have no elements
for ( EvaluationContext group : contexts.values() ) { for ( EvaluationContext group : contexts ) {
VariantContext vc = getEvalContext(group.trackName, tracker, context); VariantContext vc = vcs.get(group.evalTrackName);
//logger.debug(String.format("Updating %s of %s with variant", group.name, vc)); //logger.debug(String.format("Updating %s of %s with variant", group.name, vc));
Set<VariantEvaluator> evaluations = group.evaluations;
for ( Map.Entry<String, Set<VariantEvaluator>> namedEvaluations : group.entrySet() ) { boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group);
String evaluationName = namedEvaluations.getKey();
Set<VariantEvaluator> evaluations = namedEvaluations.getValue();
boolean evalWantsVC = applyVCtoEvaluation(evaluationName, vc, comps, group);
List<String> interestingReasons = new ArrayList<String>(); List<String> interestingReasons = new ArrayList<String>();
for ( VariantEvaluator evaluation : evaluations ) { for ( VariantEvaluator evaluation : evaluations ) {
@ -254,10 +330,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
} }
break; break;
case 2: case 2:
for ( VariantContext comp : comps ) { VariantContext comp = vcs.get(group.compTrackName);
String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context); String interesting = evaluation.update2( evalWantsVC ? vc : null, comp, tracker, ref, context );
if ( interesting != null ) interestingReasons.add(interesting); if ( interesting != null ) interestingReasons.add(interesting);
}
break; break;
default: default:
throw new StingException("BUG: Unexpected evaluation order " + evaluation); throw new StingException("BUG: Unexpected evaluation order " + evaluation);
@ -265,10 +340,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
} }
} }
if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(evaluationName) ) if ( group.enableInterestingSiteCaptures && captureInterestingSitesOfEvalSet(group) )
writeInterestingSite(interestingReasons, vc); writeInterestingSite(interestingReasons, vc);
} }
}
return 0; return 0;
} }
@ -307,20 +381,20 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
} }
} }
private boolean applyVCtoEvaluation(String evaluationName, VariantContext vc, Collection<VariantContext> comps, EvaluationContext group) { private boolean applyVCtoEvaluation(VariantContext vc, Map<String, VariantContext> vcs, EvaluationContext group) {
if ( vc == null ) if ( vc == null )
return true; return true;
if ( evaluationName.contains(FILTERED_SET_NAME) && vc.isNotFiltered() ) if ( group.requiresFiltered() && vc.isNotFiltered() )
return false; return false;
if ( evaluationName.contains(RETAINED_SET_NAME) && vc.isFiltered() ) if ( group.requiresNotFiltered() && vc.isFiltered() )
return false; return false;
boolean vcKnown = vcIsKnown(vc, comps, KNOWN_NAMES); boolean vcKnown = vcIsKnown(vc, vcs, KNOWN_NAMES);
if ( evaluationName.contains(KNOWN_SET_NAME) && ! vcKnown ) if ( group.requiresKnown() && ! vcKnown )
return false; return false;
else if ( evaluationName.contains(NOVEL_SET_NAME) && vcKnown ) else if ( group.requiresNovel() && vcKnown )
return false; return false;
if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) ) if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) )
@ -330,16 +404,13 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
return true; return true;
} }
private boolean vcIsKnown(VariantContext vc, Collection<VariantContext> comps, String[] knownNames ) { private boolean vcIsKnown(VariantContext vc, Map<String, VariantContext> vcs, String[] knownNames ) {
for ( VariantContext comp : comps ) {
if ( comp.isNotFiltered() && comp.getType() == vc.getType() ) {
for ( String knownName : knownNames ) { for ( String knownName : knownNames ) {
if ( comp.getName().equals(knownName) ) { VariantContext known = vcs.get(knownName);
if ( known != null && known.isNotFiltered() && known.getType() == vc.getType() ) {
return true; return true;
} }
} }
}
}
return false; return false;
} }
@ -351,23 +422,23 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
// //
//logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec)); //logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec));
private Collection<VariantContext> getCompVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) { private Map<String, VariantContext> getVariantContexts(RefMetaDataTracker tracker, AlignmentContext context) {
// todo -- we need to deal with dbSNP where there can be multiple records at the same start site. A potential solution is to // todo -- we need to deal with dbSNP where there can be multiple records at the same start site. A potential solution is to
// todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site // todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site
Collection<VariantContext> comps = tracker.getVariantContexts(compNames, null, context.getLocation(), true, true); Map<String, VariantContext> bindings = new HashMap<String, VariantContext>();
bindVariantContexts(bindings, evalNames, tracker, context);
// todo -- remove me when the loop works correctly for comparisons of eval x comp for each comp bindVariantContexts(bindings, compNames, tracker, context);
if ( comps.size() > 1 ) throw new StingException("VariantEval2 currently only supports comparisons of N eval tracks vs. a single comparison track. Yes, I know..."); return bindings;
return comps;
} }
private VariantContext getEvalContext(String name, RefMetaDataTracker tracker, AlignmentContext context) { private void bindVariantContexts(Map<String, VariantContext> map, Collection<String> names,
Collection<VariantContext> contexts = tracker.getVariantContexts(name, null, context.getLocation(), true, false); RefMetaDataTracker tracker, AlignmentContext context ) {
for ( String name : names ) {
Collection<VariantContext> contexts = tracker.getVariantContexts(name, ALLOW_VARIANT_CONTEXT_TYPES, context.getLocation(), true, true);
if ( context.size() > 1 ) if ( context.size() > 1 )
throw new StingException("Found multiple variant contexts at " + context.getLocation()); throw new StingException("Found multiple variant contexts at " + context.getLocation());
map.put(name, contexts.size() == 1 ? contexts.iterator().next() : null);
return contexts.size() == 1 ? contexts.iterator().next() : null; }
} }
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@ -390,30 +461,17 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
return null; return null;
} }
private final static String CONTEXT_HEADER = "track.subset.novelty.filter";
private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length;
private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS];
static {
int i = 0;
for ( String elt : CONTEXT_HEADER.split("\\.") )
nameSizes[i++] = elt.length();
}
private void determineContextNamePartSizes() { private void determineContextNamePartSizes() {
for ( String contextName : Utils.sorted(contexts.keySet()) ) { for ( EvaluationContext group : contexts ) {
EvaluationContext group = contexts.get(contextName); String[] parts = group.getDisplayName().split("\\.");
for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) {
String keyWord = contextName + "." + evalSubgroupName;
String[] parts = keyWord.split("\\.");
if ( parts.length != N_CONTEXT_NAME_PARTS ) { if ( parts.length != N_CONTEXT_NAME_PARTS ) {
throw new StingException("Unexpected number of eval name parts " + keyWord + " length = " + parts.length + ", expected " + N_CONTEXT_NAME_PARTS); throw new StingException("Unexpected number of eval name parts " + group.getDisplayName() + " length = " + parts.length + ", expected " + N_CONTEXT_NAME_PARTS);
} else { } else {
for ( int i = 0; i < parts.length; i++ ) for ( int i = 0; i < parts.length; i++ )
nameSizes[i] = Math.max(nameSizes[i], parts[i].length()); nameSizes[i] = Math.max(nameSizes[i], parts[i].length());
} }
} }
} }
}
private String formatKeyword(String keyWord) { private String formatKeyword(String keyWord) {
//System.out.printf("keyword %s%n", keyWord); //System.out.printf("keyword %s%n", keyWord);
@ -422,7 +480,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
int i = 0; int i = 0;
for ( String part : keyWord.split("\\.") ) { for ( String part : keyWord.split("\\.") ) {
//System.out.printf("part %s %d%n", part, nameSizes[i]); //System.out.printf("part %s %d%n", part, nameSizes[i]);
s.append(String.format("%"+nameSizes[i]+"s ", part)); s.append(String.format("%" + nameSizes[i] + "s ", part));
i++; i++;
} }
@ -436,17 +494,19 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
for ( String evalName : variantEvaluationNames ) { for ( String evalName : variantEvaluationNames ) {
boolean first = true; boolean first = true;
out.printf("%n%n"); out.printf("%n%n");
// todo -- show that comp is dbsnp, etc. is columns // todo -- show that comp is dbsnp, etc. is columns
for ( String contextName : Utils.sorted(contexts.keySet()) ) { String lastEvalTrack = null;
EvaluationContext group = contexts.get(contextName); for ( EvaluationContext group : contexts ) {
if ( lastEvalTrack == null || ! lastEvalTrack.equals(group.evalTrackName) ) {
out.printf("%s%n", Utils.dupString('-', 80)); out.printf("%s%n", Utils.dupString('-', 80));
for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) { lastEvalTrack = group.evalTrackName;
Set<VariantEvaluator> evalSet = group.get(evalSubgroupName); }
VariantEvaluator eval = getEvalByName(evalName, evalSet);
String keyWord = contextName + "." + evalSubgroupName;
if ( eval.enabled() ) {
VariantEvaluator eval = getEvalByName(evalName, group.evaluations);
String keyWord = group.getDisplayName();
if ( eval.enabled() ) {
if ( first ) { if ( first ) {
out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader())); out.printf("%20s %s %s%n", evalName, formatKeyword(CONTEXT_HEADER), Utils.join("\t", eval.getTableHeader()));
first = false; first = false;
@ -458,7 +518,6 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
} }
} }
} }
}
protected Logger getLogger() { return logger; } protected Logger getLogger() { return logger; }
} }

View File

@ -14,17 +14,14 @@ public class VariantEval2IntegrationTest extends WalkerTest {
" -R " + oneKGLocation + "reference/human_b36_both.fasta"; " -R " + oneKGLocation + "reference/human_b36_both.fasta";
private static String root = cmdRoot + private static String root = cmdRoot +
" -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B eval,VCF,/humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; " -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-L 1:1-10,000,000", "e7dba09b1856b9be86816939596a5062");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "c170c672ca2ef86068cc5dee9aaac022");
}
@Test @Test
public void testVE2Simple() { public void testVE2Simple() {
HashMap<String, String> expectations = new HashMap<String, String>();
expectations.put("-L 1:1-10,000,000", "d58a2a22e5fb3a3d8d90ba02de37f62b");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "8a928c8ad99428445e53b0b83f8ccdfa");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) { for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey(); String extraArgs = entry.getKey();
@ -37,12 +34,35 @@ public class VariantEval2IntegrationTest extends WalkerTest {
} }
} }
@Test
public void testVE2Complex() {
HashMap<String, String> expectations = new HashMap<String, String>();
String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -MVQ 30" +
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
String eqMD5s = "380e082222111c7bf962095d9afca8da"; // next two examples should be the same!
expectations.put("", eqMD5s);
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
expectations.put(" -known comp_hapmap", "90d7d4d0ff370e9457978b2869782aa0");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs2 = entry.getKey();
String md5 = entry.getValue();
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs1 + extraArgs2 + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testVE2Complex", spec);
}
}
@Test @Test
public void testVE2WriteVCF() { public void testVE2WriteVCF() {
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
2, 2,
Arrays.asList("c53d7638df2d7440dee1fd274d1f6384", "9ec81f7389c0971e44e4b8d2d4af3008")); Arrays.asList("b7d52d13e6eb3d593395a644583e449a", "9ec81f7389c0971e44e4b8d2d4af3008"));
executeTest("testVE2WriteVCF", spec); executeTest("testVE2WriteVCF", spec);
} }
} }