Lots of new functionality for StratificationStates manager

-- Really working according to unit tests
-- A nCombination utils
This commit is contained in:
Mark DePristo 2012-03-28 12:55:29 -04:00
parent 91c5353c4c
commit 9f1cd0ff66
5 changed files with 207 additions and 81 deletions

View File

@ -27,32 +27,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import java.util.List;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public interface SetOfStates {
public List<String> getAllStates();
* A basic interface for a class to be used with the StratificationStates system
*
* @author Mark DePristo
* @since 3/28/12
*/
public interface SetOfStates<Object> {
/**
* @return a list of all objects states that may be provided by this States provider
*/
public List<Object> getAllStates();
}

View File

@ -24,12 +24,12 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Helper class representing a tree of stratification splits, where leaf nodes
@ -49,35 +49,48 @@ import java.util.Map;
* This code allows us to efficiently look up a state key (A=2, B=3) and map it
* to a specific key (an integer) that's unique over the tree
*
* Note the structure of this tree is that the keys are -1 for all internal nodes, and
* leafs are the only nodes with meaningful keys. So for a tree with 2N nodes N of these
* will be internal, with no keys, and meaningful maps from states -> subtrees. The
* other N nodes are leafs, with meaningful keys, empty maps, and null stratification objects
*
* @author Mark DePristo
* @since 3/27/12
*/
public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>> {
@Invariant({
"(isLeaf() && stratifier == null && subnodes.isEmpty()) || (!isLeaf() && stratifier != null && !subnodes.isEmpty())"})
class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>> {
int key = -1;
final T stratifier;
final Map<String, StratNode<T>> subnodes;
final Map<Object, StratNode<T>> subnodes;
public StratNode() {
protected StratNode() {
this.subnodes = Collections.emptyMap();
this.stratifier = null;
}
StratNode(final T stratifier, final Map<String, StratNode<T>> subnodes) {
protected StratNode(final T stratifier, final Map<Object, StratNode<T>> subnodes) {
this.stratifier = stratifier;
this.subnodes = subnodes;
}
@Requires("key >= 0")
public void setKey(final int key) {
if ( ! isLeaf() )
throw new ReviewedStingException("Cannot set key of non-leaf node");
this.key = key;
}
public int find(final List<String> states, int offset) {
@Requires({
"states != null",
"offset >= 0",
"offset <= states.size()"
})
public int find(final List<Object> states, int offset) {
if ( isLeaf() ) // we're here!
return key;
else {
final String state = states.get(offset);
final Object state = states.get(offset);
StratNode<T> subnode = subnodes.get(state);
if ( subnode == null )
throw new ReviewedStingException("Couldn't find state for " + state + " at node " + this);
@ -86,6 +99,28 @@ public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>>
}
}
@Requires({
"multipleStates != null",
"offset >= 0",
"offset <= multipleStates.size()",
"keys != null",
"offset == multipleStates.size() || multipleStates.get(offset) != null"})
public void find(final List<List<Object>> multipleStates, final int offset, final HashSet<Integer> keys) {
if ( isLeaf() ) // we're here!
keys.add(key);
else {
for ( final Object state : multipleStates.get(offset) ) {
// loop over all of the states at this offset
final StratNode<T> subnode = subnodes.get(state);
if ( subnode == null )
throw new ReviewedStingException("Couldn't find state for " + state + " at node " + this);
else
subnode.find(multipleStates, offset+1, keys);
}
}
}
@Ensures("result >= 0")
public int getKey() {
if ( ! isLeaf() )
throw new ReviewedStingException("Cannot get key of non-leaf node");
@ -93,10 +128,11 @@ public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>>
return key;
}
protected Map<String, StratNode<T>> getSubnodes() {
protected Map<Object, StratNode<T>> getSubnodes() {
return subnodes;
}
@Ensures("result >= 0")
public int size() {
if ( isLeaf() )
return 1;
@ -109,9 +145,19 @@ public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>>
return stratifier;
}
public boolean isLeaf() { return stratifier == null; }
/**
* @return true if this node is a leaf
*/
public boolean isLeaf() {
return stratifier == null;
}
/**
* Returns an iterator over this node and all subnodes including internal and leaf nodes
* @return
*/
@Override
@Ensures("result != null")
public Iterator<StratNode<T>> iterator() {
return new StratNodeIterator<T>(this);
}

View File

@ -50,9 +50,9 @@ public class StratificationStates<T extends SetOfStates> {
return new StratNode<T>();
} else {
// we are in the middle of the tree
final Collection<String> states = first.getAllStates();
final LinkedHashMap<String, StratNode<T>> subNodes = new LinkedHashMap<String, StratNode<T>>(states.size());
for ( final String state : states ) {
final Collection<Object> states = first.getAllStates();
final LinkedHashMap<Object, StratNode<T>> subNodes = new LinkedHashMap<Object, StratNode<T>>(states.size());
for ( final Object state : states ) {
// have to copy because poll modifies the queue
final Queue<T> copy = new LinkedList<T>(strats);
subNodes.put(state, buildStratificationTree(copy));
@ -64,19 +64,38 @@ public class StratificationStates<T extends SetOfStates> {
public int getNStates() {
return root.size();
}
public StratNode<T> getRoot() {
return root;
}
public int getKey(final List<String> states) {
public int getKey(final List<Object> states) {
return root.find(states, 0);
}
public Set<Integer> getKeys(final List<List<Object>> allStates) {
final HashSet<Integer> keys = new HashSet<Integer>();
root.find(allStates, 0, keys);
return keys;
}
private void assignKeys(final StratNode<T> root, int key) {
for ( final StratNode<T> node : root ) {
if ( node.isLeaf() )
node.setKey(key++);
}
}
public static List<List<Object>> combineStates(final List<Object> first, final List<Object> second) {
List<List<Object>> combined = new ArrayList<List<Object>>(first.size());
for ( int i = 0; i < first.size(); i++ ) {
final Object firstI = first.get(i);
final Object secondI = second.get(i);
if ( firstI.equals(secondI) )
combined.add(Collections.singletonList(firstI));
else
combined.add(Arrays.asList(firstI, secondI));
}
return combined;
}
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.util.StringUtil;
@ -710,4 +711,36 @@ public class Utils {
}
return list;
}
/**
* Returns the number of combinations represented by this collection
* of collection of options.
*
* For example, if this is [[A, B], [C, D], [E, F, G]] returns 2 * 2 * 3 = 12
*
* @param options
* @param <T>
* @return
*/
@Requires("options != null")
public static <T> int nCombinations(final Collection<T>[] options) {
int nStates = 1;
for ( Collection<T> states : options ) {
nStates *= states.size();
}
return nStates;
}
@Requires("options != null")
public static <T> int nCombinations(final List<List<T>> options) {
if ( options.isEmpty() )
return 0;
else {
int nStates = 1;
for ( Collection<T> states : options ) {
nStates *= states.size();
}
return nStates;
}
}
}

View File

@ -30,6 +30,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
@ -51,46 +52,58 @@ public class StratificationStatesUnitTest extends BaseTest {
// --------------------------------------------------------------------------------
private class StratificationStatesTestProvider extends TestDataProvider {
final List<List<Integer>> allStates;
final List<List<Object>> allStates = new ArrayList<List<Object>>();
final List<ListAsSetOfStates> asSetOfStates = new ArrayList<ListAsSetOfStates>();
final int nStates;
public StratificationStatesTestProvider(final List<Integer> ... allStates) {
super(StratificationStatesTestProvider.class);
this.allStates = Arrays.asList(allStates);
for ( List<Integer> states : allStates ) {
this.allStates.add(new ArrayList<Object>(states));
}
int nStates = 1;
for ( List<Integer> states : this.allStates ) {
nStates *= states.size();
for ( List<Object> states : this.allStates ) {
asSetOfStates.add(new ListAsSetOfStates(states));
}
this.nStates = nStates;
}
// private String getName() {
// return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions));
// }
this.nStates = Utils.nCombinations(allStates);
setName(getName());
}
private String getName() {
StringBuilder b = new StringBuilder();
int c = 1;
for ( List<Object> state : allStates )
b.append(String.format("%d = [%s] ", c++, Utils.join(",", state)));
return b.toString();
}
public List<ListAsSetOfStates> getStateSpaceList() {
return asSetOfStates;
}
public Queue<List<String>> getAllCombinations() {
return getAllCombinations(new LinkedList<List<Integer>>(allStates));
public Queue<List<Object>> getAllCombinations() {
return getAllCombinations(new LinkedList<List<Object>>(allStates));
}
private Queue<List<String>> getAllCombinations(Queue<List<Integer>> states) {
private Queue<List<Object>> getAllCombinations(Queue<List<Object>> states) {
if ( states.isEmpty() )
return new LinkedList<List<String>>();
return new LinkedList<List<Object>>();
else {
List<Integer> head = states.poll();
Queue<List<String>> substates = getAllCombinations(states);
Queue<List<String>> newStates = new LinkedList<List<String>>();
for ( int e : head) {
for ( List<String> state : substates ) {
List<String> newState = new LinkedList<String>();
newState.add(Integer.toString(e));
newState.addAll(state);
newStates.add(newState);
List<Object> head = states.poll();
Queue<List<Object>> substates = getAllCombinations(states);
Queue<List<Object>> newStates = new LinkedList<List<Object>>();
for ( final Object e : head) {
if ( substates.isEmpty() ) {
newStates.add(new LinkedList<Object>(Collections.singleton(e)));
} else {
for ( final List<Object> state : substates ) {
List<Object> newState = new LinkedList<Object>();
newState.add(e);
newState.addAll(state);
newStates.add(newState);
}
}
}
return newStates;
@ -99,16 +112,14 @@ public class StratificationStatesUnitTest extends BaseTest {
}
private class ListAsSetOfStates implements SetOfStates {
final List<String> integers;
final List<Object> integers;
private ListAsSetOfStates(final List<Integer> integers) {
this.integers = new ArrayList<String>(integers.size());
for ( int i : integers )
this.integers.add(Integer.toString(i));
private ListAsSetOfStates(final List<Object> integers) {
this.integers = integers;
}
@Override
public List<String> getAllStates() {
public List<Object> getAllStates() {
return integers;
}
}
@ -127,8 +138,8 @@ public class StratificationStatesUnitTest extends BaseTest {
}
@Test(dataProvider = "StratificationStatesTestProvider")
public void testStratificationStatesTestProvider(StratificationStatesTestProvider cfg) {
StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
public void testLeafCount(StratificationStatesTestProvider cfg) {
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
Assert.assertEquals(stratificationStates.getNStates(), cfg.nStates);
@ -138,20 +149,55 @@ public class StratificationStatesUnitTest extends BaseTest {
nLeafs++;
}
Assert.assertEquals(nLeafs, cfg.nStates, "Unexpected number of leaves");
Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
}
@Test(dataProvider = "StratificationStatesTestProvider")
public void testKeys(StratificationStatesTestProvider cfg) {
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
final Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
for ( final StratNode node : stratificationStates.getRoot() ) {
if ( node.isLeaf() ) {
Assert.assertFalse(seenKeys.contains(node.getKey()), "Already seen the key");
seenKeys.add(node.getKey());
}
}
}
seenKeys.clear();
for ( List<String> state : cfg.getAllCombinations() ) {
@Test(dataProvider = "StratificationStatesTestProvider")
public void testFindSingleKeys(StratificationStatesTestProvider cfg) {
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
final Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
for ( List<Object> state : cfg.getAllCombinations() ) {
final int key = stratificationStates.getKey(state);
Assert.assertFalse(seenKeys.contains(key), "Already saw state mapping to this key");
seenKeys.add(key);
}
}
@Test(dataProvider = "StratificationStatesTestProvider")
public void testFindMultipleKeys(StratificationStatesTestProvider cfg) {
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
final List<List<Object>> states = new ArrayList<List<Object>>(cfg.allStates);
final Set<Integer> keys = stratificationStates.getKeys(states);
Assert.assertEquals(keys.size(), cfg.nStates, "Find all states didn't find all of the expected unique keys");
final Queue<List<Object>> combinations = cfg.getAllCombinations();
while ( ! combinations.isEmpty() ) {
List<Object> first = combinations.poll();
List<Object> second = combinations.peek();
if ( second != null ) {
List<List<Object>> combined = StratificationStates.combineStates(first, second);
int nExpectedKeys = Utils.nCombinations(combined);
final int key1 = stratificationStates.getKey(first);
final int key2 = stratificationStates.getKey(second);
final Set<Integer> keysCombined = stratificationStates.getKeys(combined);
Assert.assertTrue(keysCombined.contains(key1), "couldn't find key in data set");
Assert.assertTrue(keysCombined.contains(key2), "couldn't find key in data set");
Assert.assertEquals(keysCombined.size(), nExpectedKeys);
}
}
}
}