VariantEval cleanup. Added VariantType Stratification

-- ArrayList are List where possible
-- states refactored into VariantStratifier base class (reduces many lines of duplicate code)
-- Added VariantType stratification that partitions report by VariantContext.Type
This commit is contained in:
Mark DePristo 2011-09-07 10:43:53 -04:00
parent 9559115ad5
commit 2f4cf82e3b
16 changed files with 139 additions and 120 deletions

View File

@ -122,9 +122,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false)
protected Boolean NO_STANDARD_STRATIFICATIONS = false;
@Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false)
protected Set<VariantContext.Type> typesToUse = null;
/**
* See the -list argument to view available modules.
*/
@ -317,9 +314,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// find the comp
final VariantContext comp = findMatchingComp(eval, compSet);
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>();
for ( VariantStratifier vs : stratificationObjects ) {
ArrayList<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
List<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
stateMap.put(vs, states);
}

View File

@ -10,10 +10,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Stratifies the eval RODs by the allele count of the alternate allele
*
* Looks at the AC value in the INFO field, and uses that value if present. If absent,
* computes the AC from the genotypes themselves. For no AC can be computed, 0 is used.
*/
public class AlleleCount extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states = new ArrayList<String>();
@Override
public void initialize() {
List<RodBinding<VariantContext>> evals = getVariantEvalWalker().getEvals();
@ -35,11 +38,7 @@ public class AlleleCount extends VariantStratifier {
getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(1);
if (eval != null) {

View File

@ -6,11 +6,15 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Stratifies the eval RODs by the allele frequency of the alternate allele
*
* Uses a constant 0.005 frequency grid, and projects the AF INFO field value. Requires
* that AF be present in every ROD, otherwise this stratification throws an exception
*/
public class AlleleFrequency extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
@ -19,11 +23,7 @@ public class AlleleFrequency extends VariantStratifier {
}
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
if (eval != null) {

View File

@ -6,22 +6,21 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Required stratification grouping output by each comp ROD
*/
public class CompRod extends VariantStratifier implements RequiredStratification {
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
for ( RodBinding<VariantContext> rod : getVariantEvalWalker().getComps() )
states.add(rod.getName());
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add(compName);

View File

@ -5,23 +5,19 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Stratifies the evaluation by each contig in the reference sequence
*/
public class Contig extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
states.addAll(getVariantEvalWalker().getContigNames());
states.add("all");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
if (eval != null) {

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site
@ -19,21 +20,14 @@ import java.util.ArrayList;
* A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G.
*/
public class CpG extends VariantStratifier {
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
states.add("all");
states.add("CpG");
states.add("non_CpG");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
boolean isCpG = false;
if (ref != null && ref.getBases() != null) {
String fwRefBases = new String(ref.getBases());

View File

@ -7,10 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
/**
* Experimental stratification by the degeneracy of an amino acid, according to VCF annotation. Not safe
*/
public class Degeneracy extends VariantStratifier {
private ArrayList<String> states;
private HashMap<String, HashMap<Integer, String>> degeneracies;
@Override
@ -77,11 +79,7 @@ public class Degeneracy extends VariantStratifier {
}
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all");

View File

@ -6,10 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Required stratification grouping output by each eval ROD
*/
public class EvalRod extends VariantStratifier implements RequiredStratification {
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
@ -17,11 +19,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
states.add(rod.getName());
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add(evalName);

View File

@ -5,24 +5,20 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Stratifies by the FILTER status (PASS, FAIL) of the eval records
*/
public class Filter extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
states.add("called");
states.add("filtered");
states.add("raw");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("raw");

View File

@ -5,25 +5,22 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
*/
public class FunctionalClass extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override
public void initialize() {
states = new ArrayList<String>();
states.add("all");
states.add("silent");
states.add("missense");
states.add("nonsense");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all");

View File

@ -6,30 +6,30 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.List;
import java.util.ArrayList;
import java.util.Set;
/**
* Stratifies the eval RODs by user-supplied JEXL expressions
*
* See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details
*/
public class JexlExpression extends VariantStratifier implements StandardStratification {
// needs to know the jexl expressions
private Set<SortableJexlVCMatchExp> jexlExpressions;
private ArrayList<String> states;
@Override
public void initialize() {
jexlExpressions = getVariantEvalWalker().getJexlExpressions();
states = new ArrayList<String>();
states.add("none");
for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) {
states.add(jexlExpression.name);
}
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("none");

View File

@ -7,32 +7,31 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Stratifies by whether a site in in the list of known RODs (e.g., dbsnp by default)
*/
public class Novelty extends VariantStratifier implements StandardStratification {
// needs the variant contexts and known names
private List<RodBinding<VariantContext>> knowns;
final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
@Override
public void initialize() {
states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
knowns = getVariantEvalWalker().getKnowns();
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
if (tracker != null && eval != null) {
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval
if ( eval.getType() == c.getType() ) {
return new ArrayList<String>(Arrays.asList("all", "known"));
return Arrays.asList("all", "known");
}
}
}
return new ArrayList<String>(Arrays.asList("all", "novel"));
return Arrays.asList("all", "novel");
}
}

View File

@ -4,26 +4,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Stratifies the eval RODs by each sample in the eval ROD.
*
* This allows the system to analyze each sample separately. Since many evaluations
* only consider non-reference sites, stratifying by sample results in meaningful
* calculations for CompOverlap
*/
public class Sample extends VariantStratifier {
// needs the sample names
private ArrayList<String> samples;
@Override
public void initialize() {
samples = new ArrayList<String>();
samples.addAll(getVariantEvalWalker().getSampleNamesForStratification());
states.addAll(getVariantEvalWalker().getSampleNamesForStratification());
}
public ArrayList<String> getAllStates() {
return samples;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add(sampleName);
return relevantStates;
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return Arrays.asList(sampleName);
}
}

View File

@ -6,9 +6,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public abstract class VariantStratifier implements Comparable {
private VariantEvalWalker variantEvalWalker;
protected ArrayList<String> states = new ArrayList<String>();
/**
* @return a reference to the parent VariantEvalWalker running this stratification
@ -27,15 +30,15 @@ public abstract class VariantStratifier implements Comparable {
public abstract void initialize();
public ArrayList<String> getAllStates() {
return new ArrayList<String>();
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return null;
}
public int compareTo(Object o1) {
return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName());
}
public ArrayList<String> getAllStates() {
return states;
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* Stratifies the eval variants by their type (SNP, INDEL, ETC)
*/
public class VariantType extends VariantStratifier {
@Override
public void initialize() {
for ( VariantContext.Type t : VariantContext.Type.values() ) {
states.add(t.toString());
}
}
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return eval == null ? Collections.<String>emptyList() : Arrays.asList(eval.getType().toString());
}
}

View File

@ -266,10 +266,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested sample
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
ArrayList<String> sampleNames = new ArrayList<String>();
sampleNames.add(sampleName);
return getSubsetOfVariantContext(vc, sampleNames);
return getSubsetOfVariantContext(vc, Arrays.asList(sampleName));
}
/**
@ -371,12 +368,12 @@ public class VariantEvalUtils {
* @param stateKeys all the state keys
* @return a list of state keys
*/
public ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, ArrayList<String>> stateMap, Stack<HashMap<VariantStratifier, ArrayList<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) {
public ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, List<String>> stateMap, Stack<HashMap<VariantStratifier, List<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) {
if (stateStack == null) {
stateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
stateStack = new Stack<HashMap<VariantStratifier, List<String>>>();
for (VariantStratifier vs : stateMap.keySet()) {
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = new HashMap<VariantStratifier, ArrayList<String>>();
HashMap<VariantStratifier, List<String>> oneSetOfStates = new HashMap<VariantStratifier, List<String>>();
oneSetOfStates.put(vs, stateMap.get(vs));
stateStack.add(oneSetOfStates);
@ -384,10 +381,10 @@ public class VariantEvalUtils {
}
if (!stateStack.isEmpty()) {
Stack<HashMap<VariantStratifier, ArrayList<String>>> newStateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
Stack<HashMap<VariantStratifier, List<String>>> newStateStack = new Stack<HashMap<VariantStratifier, List<String>>>();
newStateStack.addAll(stateStack);
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = newStateStack.pop();
HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
for (String state : oneSetOfStates.get(vs)) {