Lots of new functionality for StratificationStates manager
-- Really working according to unit tests -- A nCombination utils
This commit is contained in:
parent
91c5353c4c
commit
9f1cd0ff66
|
|
@ -27,32 +27,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface SetOfStates {
|
||||
public List<String> getAllStates();
|
||||
* A basic interface for a class to be used with the StratificationStates system
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 3/28/12
|
||||
*/
|
||||
public interface SetOfStates<Object> {
|
||||
/**
|
||||
* @return a list of all objects states that may be provided by this States provider
|
||||
*/
|
||||
public List<Object> getAllStates();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,12 +24,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Helper class representing a tree of stratification splits, where leaf nodes
|
||||
|
|
@ -49,35 +49,48 @@ import java.util.Map;
|
|||
* This code allows us to efficiently look up a state key (A=2, B=3) and map it
|
||||
* to a specific key (an integer) that's unique over the tree
|
||||
*
|
||||
* Note the structure of this tree is that the keys are -1 for all internal nodes, and
|
||||
* leafs are the only nodes with meaningful keys. So for a tree with 2N nodes N of these
|
||||
* will be internal, with no keys, and meaningful maps from states -> subtrees. The
|
||||
* other N nodes are leafs, with meaningful keys, empty maps, and null stratification objects
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 3/27/12
|
||||
*/
|
||||
public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>> {
|
||||
@Invariant({
|
||||
"(isLeaf() && stratifier == null && subnodes.isEmpty()) || (!isLeaf() && stratifier != null && !subnodes.isEmpty())"})
|
||||
class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>> {
|
||||
int key = -1;
|
||||
final T stratifier;
|
||||
final Map<String, StratNode<T>> subnodes;
|
||||
final Map<Object, StratNode<T>> subnodes;
|
||||
|
||||
public StratNode() {
|
||||
protected StratNode() {
|
||||
this.subnodes = Collections.emptyMap();
|
||||
this.stratifier = null;
|
||||
}
|
||||
|
||||
StratNode(final T stratifier, final Map<String, StratNode<T>> subnodes) {
|
||||
protected StratNode(final T stratifier, final Map<Object, StratNode<T>> subnodes) {
|
||||
this.stratifier = stratifier;
|
||||
this.subnodes = subnodes;
|
||||
}
|
||||
|
||||
@Requires("key >= 0")
|
||||
public void setKey(final int key) {
|
||||
if ( ! isLeaf() )
|
||||
throw new ReviewedStingException("Cannot set key of non-leaf node");
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public int find(final List<String> states, int offset) {
|
||||
@Requires({
|
||||
"states != null",
|
||||
"offset >= 0",
|
||||
"offset <= states.size()"
|
||||
})
|
||||
public int find(final List<Object> states, int offset) {
|
||||
if ( isLeaf() ) // we're here!
|
||||
return key;
|
||||
else {
|
||||
final String state = states.get(offset);
|
||||
final Object state = states.get(offset);
|
||||
StratNode<T> subnode = subnodes.get(state);
|
||||
if ( subnode == null )
|
||||
throw new ReviewedStingException("Couldn't find state for " + state + " at node " + this);
|
||||
|
|
@ -86,6 +99,28 @@ public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>>
|
|||
}
|
||||
}
|
||||
|
||||
@Requires({
|
||||
"multipleStates != null",
|
||||
"offset >= 0",
|
||||
"offset <= multipleStates.size()",
|
||||
"keys != null",
|
||||
"offset == multipleStates.size() || multipleStates.get(offset) != null"})
|
||||
public void find(final List<List<Object>> multipleStates, final int offset, final HashSet<Integer> keys) {
|
||||
if ( isLeaf() ) // we're here!
|
||||
keys.add(key);
|
||||
else {
|
||||
for ( final Object state : multipleStates.get(offset) ) {
|
||||
// loop over all of the states at this offset
|
||||
final StratNode<T> subnode = subnodes.get(state);
|
||||
if ( subnode == null )
|
||||
throw new ReviewedStingException("Couldn't find state for " + state + " at node " + this);
|
||||
else
|
||||
subnode.find(multipleStates, offset+1, keys);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public int getKey() {
|
||||
if ( ! isLeaf() )
|
||||
throw new ReviewedStingException("Cannot get key of non-leaf node");
|
||||
|
|
@ -93,10 +128,11 @@ public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>>
|
|||
return key;
|
||||
}
|
||||
|
||||
protected Map<String, StratNode<T>> getSubnodes() {
|
||||
protected Map<Object, StratNode<T>> getSubnodes() {
|
||||
return subnodes;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public int size() {
|
||||
if ( isLeaf() )
|
||||
return 1;
|
||||
|
|
@ -109,9 +145,19 @@ public class StratNode<T extends SetOfStates> implements Iterable<StratNode<T>>
|
|||
return stratifier;
|
||||
}
|
||||
|
||||
public boolean isLeaf() { return stratifier == null; }
|
||||
/**
|
||||
* @return true if this node is a leaf
|
||||
*/
|
||||
public boolean isLeaf() {
|
||||
return stratifier == null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator over this node and all subnodes including internal and leaf nodes
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
@Ensures("result != null")
|
||||
public Iterator<StratNode<T>> iterator() {
|
||||
return new StratNodeIterator<T>(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,9 +50,9 @@ public class StratificationStates<T extends SetOfStates> {
|
|||
return new StratNode<T>();
|
||||
} else {
|
||||
// we are in the middle of the tree
|
||||
final Collection<String> states = first.getAllStates();
|
||||
final LinkedHashMap<String, StratNode<T>> subNodes = new LinkedHashMap<String, StratNode<T>>(states.size());
|
||||
for ( final String state : states ) {
|
||||
final Collection<Object> states = first.getAllStates();
|
||||
final LinkedHashMap<Object, StratNode<T>> subNodes = new LinkedHashMap<Object, StratNode<T>>(states.size());
|
||||
for ( final Object state : states ) {
|
||||
// have to copy because poll modifies the queue
|
||||
final Queue<T> copy = new LinkedList<T>(strats);
|
||||
subNodes.put(state, buildStratificationTree(copy));
|
||||
|
|
@ -64,19 +64,38 @@ public class StratificationStates<T extends SetOfStates> {
|
|||
public int getNStates() {
|
||||
return root.size();
|
||||
}
|
||||
|
||||
|
||||
public StratNode<T> getRoot() {
|
||||
return root;
|
||||
}
|
||||
|
||||
public int getKey(final List<String> states) {
|
||||
public int getKey(final List<Object> states) {
|
||||
return root.find(states, 0);
|
||||
}
|
||||
|
||||
public Set<Integer> getKeys(final List<List<Object>> allStates) {
|
||||
final HashSet<Integer> keys = new HashSet<Integer>();
|
||||
root.find(allStates, 0, keys);
|
||||
return keys;
|
||||
}
|
||||
|
||||
private void assignKeys(final StratNode<T> root, int key) {
|
||||
for ( final StratNode<T> node : root ) {
|
||||
if ( node.isLeaf() )
|
||||
node.setKey(key++);
|
||||
}
|
||||
}
|
||||
|
||||
public static List<List<Object>> combineStates(final List<Object> first, final List<Object> second) {
|
||||
List<List<Object>> combined = new ArrayList<List<Object>>(first.size());
|
||||
for ( int i = 0; i < first.size(); i++ ) {
|
||||
final Object firstI = first.get(i);
|
||||
final Object secondI = second.get(i);
|
||||
if ( firstI.equals(secondI) )
|
||||
combined.add(Collections.singletonList(firstI));
|
||||
else
|
||||
combined.add(Arrays.asList(firstI, secondI));
|
||||
}
|
||||
return combined;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
|
@ -710,4 +711,36 @@ public class Utils {
|
|||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of combinations represented by this collection
|
||||
* of collection of options.
|
||||
*
|
||||
* For example, if this is [[A, B], [C, D], [E, F, G]] returns 2 * 2 * 3 = 12
|
||||
*
|
||||
* @param options
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
@Requires("options != null")
|
||||
public static <T> int nCombinations(final Collection<T>[] options) {
|
||||
int nStates = 1;
|
||||
for ( Collection<T> states : options ) {
|
||||
nStates *= states.size();
|
||||
}
|
||||
return nStates;
|
||||
}
|
||||
|
||||
@Requires("options != null")
|
||||
public static <T> int nCombinations(final List<List<T>> options) {
|
||||
if ( options.isEmpty() )
|
||||
return 0;
|
||||
else {
|
||||
int nStates = 1;
|
||||
for ( Collection<T> states : options ) {
|
||||
nStates *= states.size();
|
||||
}
|
||||
return nStates;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -51,46 +52,58 @@ public class StratificationStatesUnitTest extends BaseTest {
|
|||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class StratificationStatesTestProvider extends TestDataProvider {
|
||||
final List<List<Integer>> allStates;
|
||||
final List<List<Object>> allStates = new ArrayList<List<Object>>();
|
||||
final List<ListAsSetOfStates> asSetOfStates = new ArrayList<ListAsSetOfStates>();
|
||||
final int nStates;
|
||||
|
||||
public StratificationStatesTestProvider(final List<Integer> ... allStates) {
|
||||
super(StratificationStatesTestProvider.class);
|
||||
this.allStates = Arrays.asList(allStates);
|
||||
|
||||
for ( List<Integer> states : allStates ) {
|
||||
this.allStates.add(new ArrayList<Object>(states));
|
||||
}
|
||||
|
||||
int nStates = 1;
|
||||
for ( List<Integer> states : this.allStates ) {
|
||||
nStates *= states.size();
|
||||
for ( List<Object> states : this.allStates ) {
|
||||
asSetOfStates.add(new ListAsSetOfStates(states));
|
||||
}
|
||||
this.nStates = nStates;
|
||||
}
|
||||
// private String getName() {
|
||||
// return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions));
|
||||
// }
|
||||
this.nStates = Utils.nCombinations(allStates);
|
||||
|
||||
setName(getName());
|
||||
}
|
||||
|
||||
private String getName() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
int c = 1;
|
||||
for ( List<Object> state : allStates )
|
||||
b.append(String.format("%d = [%s] ", c++, Utils.join(",", state)));
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public List<ListAsSetOfStates> getStateSpaceList() {
|
||||
return asSetOfStates;
|
||||
}
|
||||
|
||||
public Queue<List<String>> getAllCombinations() {
|
||||
return getAllCombinations(new LinkedList<List<Integer>>(allStates));
|
||||
public Queue<List<Object>> getAllCombinations() {
|
||||
return getAllCombinations(new LinkedList<List<Object>>(allStates));
|
||||
}
|
||||
|
||||
private Queue<List<String>> getAllCombinations(Queue<List<Integer>> states) {
|
||||
private Queue<List<Object>> getAllCombinations(Queue<List<Object>> states) {
|
||||
if ( states.isEmpty() )
|
||||
return new LinkedList<List<String>>();
|
||||
return new LinkedList<List<Object>>();
|
||||
else {
|
||||
List<Integer> head = states.poll();
|
||||
Queue<List<String>> substates = getAllCombinations(states);
|
||||
Queue<List<String>> newStates = new LinkedList<List<String>>();
|
||||
for ( int e : head) {
|
||||
for ( List<String> state : substates ) {
|
||||
List<String> newState = new LinkedList<String>();
|
||||
newState.add(Integer.toString(e));
|
||||
newState.addAll(state);
|
||||
newStates.add(newState);
|
||||
List<Object> head = states.poll();
|
||||
Queue<List<Object>> substates = getAllCombinations(states);
|
||||
Queue<List<Object>> newStates = new LinkedList<List<Object>>();
|
||||
for ( final Object e : head) {
|
||||
if ( substates.isEmpty() ) {
|
||||
newStates.add(new LinkedList<Object>(Collections.singleton(e)));
|
||||
} else {
|
||||
for ( final List<Object> state : substates ) {
|
||||
List<Object> newState = new LinkedList<Object>();
|
||||
newState.add(e);
|
||||
newState.addAll(state);
|
||||
newStates.add(newState);
|
||||
}
|
||||
}
|
||||
}
|
||||
return newStates;
|
||||
|
|
@ -99,16 +112,14 @@ public class StratificationStatesUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private class ListAsSetOfStates implements SetOfStates {
|
||||
final List<String> integers;
|
||||
final List<Object> integers;
|
||||
|
||||
private ListAsSetOfStates(final List<Integer> integers) {
|
||||
this.integers = new ArrayList<String>(integers.size());
|
||||
for ( int i : integers )
|
||||
this.integers.add(Integer.toString(i));
|
||||
private ListAsSetOfStates(final List<Object> integers) {
|
||||
this.integers = integers;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getAllStates() {
|
||||
public List<Object> getAllStates() {
|
||||
return integers;
|
||||
}
|
||||
}
|
||||
|
|
@ -127,8 +138,8 @@ public class StratificationStatesUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
@Test(dataProvider = "StratificationStatesTestProvider")
|
||||
public void testStratificationStatesTestProvider(StratificationStatesTestProvider cfg) {
|
||||
StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
|
||||
public void testLeafCount(StratificationStatesTestProvider cfg) {
|
||||
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
|
||||
|
||||
Assert.assertEquals(stratificationStates.getNStates(), cfg.nStates);
|
||||
|
||||
|
|
@ -138,20 +149,55 @@ public class StratificationStatesUnitTest extends BaseTest {
|
|||
nLeafs++;
|
||||
}
|
||||
Assert.assertEquals(nLeafs, cfg.nStates, "Unexpected number of leaves");
|
||||
|
||||
Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "StratificationStatesTestProvider")
|
||||
public void testKeys(StratificationStatesTestProvider cfg) {
|
||||
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
|
||||
final Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
|
||||
for ( final StratNode node : stratificationStates.getRoot() ) {
|
||||
if ( node.isLeaf() ) {
|
||||
Assert.assertFalse(seenKeys.contains(node.getKey()), "Already seen the key");
|
||||
seenKeys.add(node.getKey());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
seenKeys.clear();
|
||||
for ( List<String> state : cfg.getAllCombinations() ) {
|
||||
@Test(dataProvider = "StratificationStatesTestProvider")
|
||||
public void testFindSingleKeys(StratificationStatesTestProvider cfg) {
|
||||
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
|
||||
final Set<Integer> seenKeys = new HashSet<Integer>(cfg.nStates);
|
||||
for ( List<Object> state : cfg.getAllCombinations() ) {
|
||||
final int key = stratificationStates.getKey(state);
|
||||
Assert.assertFalse(seenKeys.contains(key), "Already saw state mapping to this key");
|
||||
seenKeys.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "StratificationStatesTestProvider")
|
||||
public void testFindMultipleKeys(StratificationStatesTestProvider cfg) {
|
||||
final StratificationStates<ListAsSetOfStates> stratificationStates = new StratificationStates<ListAsSetOfStates>(cfg.getStateSpaceList());
|
||||
final List<List<Object>> states = new ArrayList<List<Object>>(cfg.allStates);
|
||||
final Set<Integer> keys = stratificationStates.getKeys(states);
|
||||
Assert.assertEquals(keys.size(), cfg.nStates, "Find all states didn't find all of the expected unique keys");
|
||||
|
||||
final Queue<List<Object>> combinations = cfg.getAllCombinations();
|
||||
while ( ! combinations.isEmpty() ) {
|
||||
List<Object> first = combinations.poll();
|
||||
List<Object> second = combinations.peek();
|
||||
if ( second != null ) {
|
||||
List<List<Object>> combined = StratificationStates.combineStates(first, second);
|
||||
int nExpectedKeys = Utils.nCombinations(combined);
|
||||
|
||||
final int key1 = stratificationStates.getKey(first);
|
||||
final int key2 = stratificationStates.getKey(second);
|
||||
final Set<Integer> keysCombined = stratificationStates.getKeys(combined);
|
||||
|
||||
Assert.assertTrue(keysCombined.contains(key1), "couldn't find key in data set");
|
||||
Assert.assertTrue(keysCombined.contains(key2), "couldn't find key in data set");
|
||||
|
||||
Assert.assertEquals(keysCombined.size(), nExpectedKeys);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue