Part I of creating a fast state space lookup for VE

-- Created a unit tested tree mapping from a List<String> -> integer (StratificationStates).  This class is the key infrastructure necessary to create a complete static mapping from all stratification combinations to an offset in a vector of EvalutionContexts for update in map.
-- Minor code cleanup throughout VE (removing unused headers, for example)
This commit is contained in:
Mark DePristo 2012-03-27 17:13:24 -04:00
parent a36f4570c4
commit a3d896d80e
9 changed files with 486 additions and 5 deletions

View File

@ -269,7 +269,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Initialize the set of stratifications and evaluations to use
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
for ( VariantStratifier vs : getStratificationObjects() ) {
for ( VariantStratifier vs : stratificationObjects ) {
if ( vs.getName().equals("Filter") )
byFilterIsEnabled = true;
else if ( vs.getName().equals("Sample") )
@ -301,11 +301,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
}
// initialize CNVs
if ( knownCNVsFile != null ) {
knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
}
//createStratificationStates(stratificationObjects);
}
public final Map<String, IntervalTree<GenomeLoc>> createIntervalTreeByContig(final IntervalBinding<Feature> intervals) {

View File

@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import java.util.List;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public interface SetOfStates {
public List<String> getAllStates();
}

View File

@ -0,0 +1,118 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* Helper class representing a tree of stratification splits, where leaf nodes
* are given a unique integer key starting at 0 and incrementing up to the
* number of leaves in the tree. This allows you to use this tree to produce
* a key to map into an array index mapped data structure.
*
* Suppose I have to strats, each with two values: A = 1, 2 and B = 3, 4
*
* This data structure creates a tree such as:
*
* root -> A -> 1 -> B -> 3 : 0
* |- B -> 4 : 1
* |- A -> 2 -> B -> 3 : 2
* |- B -> 4 : 3
*
* This code allows us to efficiently look up a state key (A=2, B=3) and map it
* to a specific key (an integer) that's unique over the tree
*
* @author Mark DePristo
* @since 3/27/12
*/
public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>> {
int key = -1;
final T stratifier;
final Map<String, StratNode<T>> subnodes;
public StratNode() {
this.subnodes = Collections.emptyMap();
this.stratifier = null;
}
StratNode(final T stratifier, final Map<String, StratNode<T>> subnodes) {
this.stratifier = stratifier;
this.subnodes = subnodes;
}
public void setKey(final int key) {
if ( ! isLeaf() )
throw new ReviewedStingException("Cannot set key of non-leaf node");
this.key = key;
}
public int find(final List<String> states, int offset) {
if ( isLeaf() ) // we're here!
return key;
else {
final String state = states.get(offset);
StratNode<T> subnode = subnodes.get(state);
if ( subnode == null )
throw new ReviewedStingException("Couldn't find state for " + state + " at node " + this);
else
return subnode.find(states, offset+1);
}
}
public int getKey() {
if ( ! isLeaf() )
throw new ReviewedStingException("Cannot get key of non-leaf node");
else
return key;
}
protected Map<String, StratNode<T>> getSubnodes() {
return subnodes;
}
public int size() {
if ( isLeaf() )
return 1;
else {
return subnodes.values().iterator().next().size() * subnodes.size();
}
}
public T getSetOfStates() {
return stratifier;
}
public boolean isLeaf() { return stratifier == null; }
@Override
public Iterator<StratNode<T>> iterator() {
return new StratNodeIterator<T>(this);
}
}

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* Helper class for creating iterators over all nodes in the stratification tree
*
* @author Mark DePristo
* @since 3/27/12
*/
class StratNodeIterator<T extends SetOfStates> implements Iterator<StratNode<T>> {
Queue<Iterator<StratNode<T>>> iterators = new LinkedList<Iterator<StratNode<T>>>();
Iterator<StratNode<T>> currentIterator;
StratNodeIterator(final StratNode<T> root) {
currentIterator = Collections.singleton(root).iterator();
for ( final StratNode<T> subNode : root.subnodes.values() )
iterators.add(new StratNodeIterator<T>(subNode));
}
@Override
public boolean hasNext() {
return currentIterator.hasNext() || ! iterators.isEmpty();
}
@Override
public StratNode<T> next() {
if ( currentIterator.hasNext() )
return currentIterator.next();
else if ( ! iterators.isEmpty() ) {
currentIterator = iterators.poll();
return next();
} else {
throw new IllegalStateException("Next called on empty iterator");
}
}
@Override
public void remove() {
throw new ReviewedStingException("Cannot remove from StratNode iterator");
}
}

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* Represents the full state space of all stratification combinations
*
* @author Mark DePristo
* @since 3/27/12
*/
public class StratificationStates<T extends SetOfStates> {
private final StratNode<T> root;
public StratificationStates(final List<T> strats) {
this.root = buildStratificationTree(new LinkedList<T>(strats));
assignKeys(root, 0);
}
private StratNode<T> buildStratificationTree(final Queue<T> strats) {
final T first = strats.poll();
if ( first == null ) {
// we are at a leaf
return new StratNode<T>();
} else {
// we are in the middle of the tree
final Collection<String> states = first.getAllStates();
final LinkedHashMap<String, StratNode<T>> subNodes = new LinkedHashMap<String, StratNode<T>>(states.size());
for ( final String state : states ) {
// have to copy because poll modifies the queue
final Queue<T> copy = new LinkedList<T>(strats);
subNodes.put(state, buildStratificationTree(copy));
}
return new StratNode<T>(first, subNodes);
}
}
public int getNStates() {
return root.size();
}
public StratNode<T> getRoot() {
return root;
}
public int getKey(final List<String> states) {
return root.find(states, 0);
}
private void assignKeys(final StratNode<T> root, int key) {
for ( final StratNode<T> node : root ) {
if ( node.isLeaf() )
node.setKey(key++);
}
}
}

View File

@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public abstract class VariantStratifier implements Comparable<VariantStratifier> {

View File

@ -0,0 +1,157 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.FileNotFoundException;
import java.util.*;
public class StratificationStatesUnitTest extends BaseTest {
@BeforeClass
public void init() throws FileNotFoundException {
}
// --------------------------------------------------------------------------------
//
// Basic tests Provider
//
// --------------------------------------------------------------------------------
private class StratificationStatesTestProvider extends TestDataProvider {
final List<List<Integer>> allStates;
final List<ListAsSetOfStates> asSetOfStates = new ArrayList<ListAsSetOfStates>();
final int nStates;
public StratificationStatesTestProvider(final List<Integer> ... allStates) {
super(StratificationStatesTestProvider.class);
this.allStates = Arrays.asList(allStates);
int nStates = 1;
for ( List<Integer> states : this.allStates ) {
nStates *= states.size();
asSetOfStates.add(new ListAsSetOfStates(states));
}
this.nStates = nStates;
}
// private String getName() {
// return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions));
// }
public List<ListAsSetOfStates> getStateSpaceList() {
return asSetOfStates;
}
public Queue<List<String>> getAllCombinations() {
return getAllCombinations(new LinkedList<List<Integer>>(allStates));
}
private Queue<List<String>> getAllCombinations(Queue<List<Integer>> states) {
if ( states.isEmpty() )
return new LinkedList<List<String>>();
else {
List<Integer> head = states.poll();
Queue<List<String>> substates = getAllCombinations(states);
Queue<List<String>> newStates = new LinkedList<List<String>>();
for ( int e : head) {
for ( List<String> state : substates ) {
List<String> newState = new LinkedList<String>();
newState.add(Integer.toString(e));
newState.addAll(state);
newStates.add(newState);
}
}
return newStates;
}
}
}
private class ListAsSetOfStates implements SetOfStates {
final List<String> integers;
private ListAsSetOfStates(final List<Integer> integers) {
this.integers = new ArrayList<String>(integers.size());
for ( int i : integers )
this.integers.add(Integer.toString(i));
}
@Override
public List<String> getAllStates() {
return integers;
}
}
@DataProvider(name = "StratificationStatesTestProvider")
public Object[][] makeStratificationStatesTestProvider() {
new StratificationStatesTestProvider(Arrays.asList(0));
new StratificationStatesTestProvider(Arrays.asList(0, 1));
new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3));
new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3), Arrays.asList(4, 5));
new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3, 4), Arrays.asList(5, 6));
new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3, 4, 5), Arrays.asList(6));
new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3, 4, 5), Arrays.asList(6, 7));
new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7));
return StratificationStatesTestProvider.getTests(StratificationStatesTestProvider.class);
}
@Test(dataProvider = "StratificationStatesTestProvider")
public void testStratificationStatesTestProvider(StratificationStatesTestProvider cfg) {
StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
Assert.assertEquals(stratificationStates.getNStates(), cfg.nStates);
int nLeafs = 0;
for ( final StratNode node : stratificationStates.getRoot() ) {
if ( node.isLeaf() )
nLeafs++;
}
Assert.assertEquals(nLeafs, cfg.nStates, "Unexpected number of leaves");
Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
for ( final StratNode node : stratificationStates.getRoot() ) {
if ( node.isLeaf() ) {
Assert.assertFalse(seenKeys.contains(node.getKey()), "Already seen the key");
seenKeys.add(node.getKey());
}
}
seenKeys.clear();
for ( List<String> state : cfg.getAllCombinations() ) {
final int key = stratificationStates.getKey(state);
Assert.assertFalse(seenKeys.contains(key), "Already saw state mapping to this key");
seenKeys.add(key);
}
}
}