diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index ebd2500fd..f12e5b548 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -269,7 +269,7 @@ public class VariantEvalWalker extends RodWalker implements Tr // Initialize the set of stratifications and evaluations to use stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); - for ( VariantStratifier vs : getStratificationObjects() ) { + for ( VariantStratifier vs : stratificationObjects ) { if ( vs.getName().equals("Filter") ) byFilterIsEnabled = true; else if ( vs.getName().equals("Sample") ) @@ -301,11 +301,12 @@ public class VariantEvalWalker extends RodWalker implements Tr } } - // initialize CNVs if ( knownCNVsFile != null ) { knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile); } + + //createStratificationStates(stratificationObjects); } public final Map> createIntervalTreeByContig(final IntervalBinding intervals) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index b2b6d4165..3f8c32b5c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java index 1b9513b9a..361cc5fea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SetOfStates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SetOfStates.java new file mode 100644 index 000000000..564aeaef3 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SetOfStates.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import java.util.List; + +/** +* [Short one sentence description of this walker] +*

+*

+* [Functionality of this walker] +*

+*

+*

Input

+*

+* [Input description] +*

+*

+*

Output

+*

+* [Output description] +*

+*

+*

Examples

+*
+*    java
+*      -jar GenomeAnalysisTK.jar
+*      -T $WalkerName
+*  
+* +* @author Your Name +* @since Date created +*/ +public interface SetOfStates { + public List getAllStates(); +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratNode.java new file mode 100644 index 000000000..1a7e2dde7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratNode.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Helper class representing a tree of stratification splits, where leaf nodes + * are given a unique integer key starting at 0 and incrementing up to the + * number of leaves in the tree. This allows you to use this tree to produce + * a key to map into an array index mapped data structure. + * + * Suppose I have to strats, each with two values: A = 1, 2 and B = 3, 4 + * + * This data structure creates a tree such as: + * + * root -> A -> 1 -> B -> 3 : 0 + * |- B -> 4 : 1 + * |- A -> 2 -> B -> 3 : 2 + * |- B -> 4 : 3 + * + * This code allows us to efficiently look up a state key (A=2, B=3) and map it + * to a specific key (an integer) that's unique over the tree + * + * @author Mark DePristo + * @since 3/27/12 + */ +public class StratNode implements Iterable> { + int key = -1; + final T stratifier; + final Map> subnodes; + + public StratNode() { + this.subnodes = Collections.emptyMap(); + this.stratifier = null; + } + + StratNode(final T stratifier, final Map> subnodes) { + this.stratifier = stratifier; + this.subnodes = subnodes; + } + + public void setKey(final int key) { + if ( ! isLeaf() ) + throw new ReviewedStingException("Cannot set key of non-leaf node"); + this.key = key; + } + + public int find(final List states, int offset) { + if ( isLeaf() ) // we're here! + return key; + else { + final String state = states.get(offset); + StratNode subnode = subnodes.get(state); + if ( subnode == null ) + throw new ReviewedStingException("Couldn't find state for " + state + " at node " + this); + else + return subnode.find(states, offset+1); + } + } + + public int getKey() { + if ( ! isLeaf() ) + throw new ReviewedStingException("Cannot get key of non-leaf node"); + else + return key; + } + + protected Map> getSubnodes() { + return subnodes; + } + + public int size() { + if ( isLeaf() ) + return 1; + else { + return subnodes.values().iterator().next().size() * subnodes.size(); + } + } + + public T getSetOfStates() { + return stratifier; + } + + public boolean isLeaf() { return stratifier == null; } + + @Override + public Iterator> iterator() { + return new StratNodeIterator(this); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratNodeIterator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratNodeIterator.java new file mode 100644 index 000000000..17aa88387 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratNodeIterator.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Helper class for creating iterators over all nodes in the stratification tree + * + * @author Mark DePristo + * @since 3/27/12 + */ +class StratNodeIterator implements Iterator> { + Queue>> iterators = new LinkedList>>(); + Iterator> currentIterator; + + StratNodeIterator(final StratNode root) { + currentIterator = Collections.singleton(root).iterator(); + for ( final StratNode subNode : root.subnodes.values() ) + iterators.add(new StratNodeIterator(subNode)); + } + + @Override + public boolean hasNext() { + return currentIterator.hasNext() || ! iterators.isEmpty(); + } + + @Override + public StratNode next() { + if ( currentIterator.hasNext() ) + return currentIterator.next(); + else if ( ! iterators.isEmpty() ) { + currentIterator = iterators.poll(); + return next(); + } else { + throw new IllegalStateException("Next called on empty iterator"); + } + } + + @Override + public void remove() { + throw new ReviewedStingException("Cannot remove from StratNode iterator"); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratificationStates.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratificationStates.java new file mode 100644 index 000000000..7f1c75fa9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratificationStates.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.*; + +/** + * Represents the full state space of all stratification combinations + * + * @author Mark DePristo + * @since 3/27/12 + */ +public class StratificationStates { + private final StratNode root; + + public StratificationStates(final List strats) { + this.root = buildStratificationTree(new LinkedList(strats)); + + assignKeys(root, 0); + } + + private StratNode buildStratificationTree(final Queue strats) { + final T first = strats.poll(); + if ( first == null ) { + // we are at a leaf + return new StratNode(); + } else { + // we are in the middle of the tree + final Collection states = first.getAllStates(); + final LinkedHashMap> subNodes = new LinkedHashMap>(states.size()); + for ( final String state : states ) { + // have to copy because poll modifies the queue + final Queue copy = new LinkedList(strats); + subNodes.put(state, buildStratificationTree(copy)); + } + return new StratNode(first, subNodes); + } + } + + public int getNStates() { + return root.size(); + } + + public StratNode getRoot() { + return root; + } + + public int getKey(final List states) { + return root.find(states, 0); + } + + private void assignKeys(final StratNode root, int key) { + for ( final StratNode node : root ) { + if ( node.isLeaf() ) + node.setKey(key++); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 119a1b83f..42d92ec01 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; public abstract class VariantStratifier implements Comparable { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratificationStatesUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratificationStatesUnitTest.java new file mode 100644 index 000000000..946aef4a9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StratificationStatesUnitTest.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.util.*; + + +public class StratificationStatesUnitTest extends BaseTest { + @BeforeClass + public void init() throws FileNotFoundException { + } + + // -------------------------------------------------------------------------------- + // + // Basic tests Provider + // + // -------------------------------------------------------------------------------- + + private class StratificationStatesTestProvider extends TestDataProvider { + final List> allStates; + final List asSetOfStates = new ArrayList(); + final int nStates; + + public StratificationStatesTestProvider(final List ... allStates) { + super(StratificationStatesTestProvider.class); + this.allStates = Arrays.asList(allStates); + + int nStates = 1; + for ( List states : this.allStates ) { + nStates *= states.size(); + asSetOfStates.add(new ListAsSetOfStates(states)); + } + this.nStates = nStates; + } +// private String getName() { +// return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions)); +// } + + public List getStateSpaceList() { + return asSetOfStates; + } + + public Queue> getAllCombinations() { + return getAllCombinations(new LinkedList>(allStates)); + } + + private Queue> getAllCombinations(Queue> states) { + if ( states.isEmpty() ) + return new LinkedList>(); + else { + List head = states.poll(); + Queue> substates = getAllCombinations(states); + Queue> newStates = new LinkedList>(); + for ( int e : head) { + for ( List state : substates ) { + List newState = new LinkedList(); + newState.add(Integer.toString(e)); + newState.addAll(state); + newStates.add(newState); + } + } + return newStates; + } + } + } + + private class ListAsSetOfStates implements SetOfStates { + final List integers; + + private ListAsSetOfStates(final List integers) { + this.integers = new ArrayList(integers.size()); + for ( int i : integers ) + this.integers.add(Integer.toString(i)); + } + + @Override + public List getAllStates() { + return integers; + } + } + + @DataProvider(name = "StratificationStatesTestProvider") + public Object[][] makeStratificationStatesTestProvider() { + new StratificationStatesTestProvider(Arrays.asList(0)); + new StratificationStatesTestProvider(Arrays.asList(0, 1)); + new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3)); + new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3), Arrays.asList(4, 5)); + new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3, 4), Arrays.asList(5, 6)); + new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3, 4, 5), Arrays.asList(6)); + new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3, 4, 5), Arrays.asList(6, 7)); + new StratificationStatesTestProvider(Arrays.asList(0, 1), Arrays.asList(2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7)); + return StratificationStatesTestProvider.getTests(StratificationStatesTestProvider.class); + } + + @Test(dataProvider = "StratificationStatesTestProvider") + public void testStratificationStatesTestProvider(StratificationStatesTestProvider cfg) { + StratificationStates stratificationStates = new StratificationStates(cfg.getStateSpaceList()); + + Assert.assertEquals(stratificationStates.getNStates(), cfg.nStates); + + int nLeafs = 0; + for ( final StratNode node : stratificationStates.getRoot() ) { + if ( node.isLeaf() ) + nLeafs++; + } + Assert.assertEquals(nLeafs, cfg.nStates, "Unexpected number of leaves"); + + Set seenKeys = new HashSet(cfg.nStates); + for ( final StratNode node : stratificationStates.getRoot() ) { + if ( node.isLeaf() ) { + Assert.assertFalse(seenKeys.contains(node.getKey()), "Already seen the key"); + seenKeys.add(node.getKey()); + } + } + + seenKeys.clear(); + for ( List state : cfg.getAllCombinations() ) { + final int key = stratificationStates.getKey(state); + Assert.assertFalse(seenKeys.contains(key), "Already saw state mapping to this key"); + seenKeys.add(key); + } + } +} \ No newline at end of file