Memory usage optimizations and safety improvements to StratNode and StratificationManager
-- Added memory and safety optimizations to StratNode and StratificationManager. Fresh, immutable Hashmaps are allocated for final data structures, so they exactly the correct size and cannot be changed by users. -- Added ability of a stratification to specify incompatible evaluation. The two strats using this are AC and Sample with VariantSummary, as this computes per-sample averages and so combining these results in an O(n^2) memory requirement. Added integration test to cover incompatible strats and evals
This commit is contained in:
parent
b335c22f6d
commit
097ed4ecc4
|
|
@ -269,7 +269,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// Initialize the set of stratifications and evaluations to use
|
||||
// The list of stratifiers and evaluators to use
|
||||
final List<VariantStratifier> stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||
final Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
final Set<Class<? extends VariantEvaluator>> evaluationClasses = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
|
||||
checkForIncompatibleEvaluatorsAndStratifiers(stratificationObjects, evaluationClasses);
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
if ( vs.getName().equals("Filter") )
|
||||
|
|
@ -289,10 +291,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
// Initialize the evaluation contexts
|
||||
createStratificationStates(stratificationObjects, evaluationObjects);
|
||||
createStratificationStates(stratificationObjects, evaluationClasses);
|
||||
|
||||
// Initialize report table
|
||||
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);
|
||||
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationClasses);
|
||||
|
||||
// Load ancestral alignments
|
||||
if (ancestralAlignmentsFile != null) {
|
||||
|
|
@ -309,6 +311,19 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
}
|
||||
|
||||
final void checkForIncompatibleEvaluatorsAndStratifiers( final List<VariantStratifier> stratificationObjects,
|
||||
Set<Class<? extends VariantEvaluator>> evaluationClasses) {
|
||||
for ( final VariantStratifier vs : stratificationObjects ) {
|
||||
for ( Class<? extends VariantEvaluator> ec : evaluationClasses )
|
||||
if ( vs.getIncompatibleEvaluators().contains(ec) )
|
||||
throw new UserException.BadArgumentValue("ST and ET",
|
||||
"The selected stratification " + vs.getName() +
|
||||
" and evaluator " + ec.getSimpleName() +
|
||||
" are incompatible due to combinatorial memory requirements." +
|
||||
" Please disable one");
|
||||
}
|
||||
}
|
||||
|
||||
final void createStratificationStates(final List<VariantStratifier> stratificationObjects, final Set<Class<? extends VariantEvaluator>> evaluationObjects) {
|
||||
final List<VariantStratifier> strats = new ArrayList<VariantStratifier>(stratificationObjects);
|
||||
stratManager = new StratificationManager<VariantStratifier, EvaluationContext>(strats);
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Stratifies the eval RODs by the allele count of the alternate allele
|
||||
|
|
@ -50,9 +50,14 @@ public class AlleleCount extends VariantStratifier {
|
|||
} else
|
||||
// by default, the site is considered monomorphic
|
||||
AC = 0;
|
||||
return Collections.singletonList((Object)AC);
|
||||
return Collections.singletonList((Object) AC);
|
||||
} else {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Stratifies the eval RODs by each sample in the eval ROD.
|
||||
|
|
@ -22,6 +22,11 @@ public class Sample extends VariantStratifier {
|
|||
}
|
||||
|
||||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
return Collections.singletonList((Object)sampleName);
|
||||
return Collections.singletonList((Object) sampleName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,11 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.Stratifier;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public abstract class VariantStratifier implements Comparable<VariantStratifier>, Stratifier {
|
||||
private VariantEvalWalker variantEvalWalker;
|
||||
|
|
@ -65,4 +68,16 @@ public abstract class VariantStratifier implements Comparable<VariantStratifier>
|
|||
public final ArrayList<Object> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The way for a stratifier to specify that it's incompatible with specific evaluations. For
|
||||
* example, VariantSummary includes a per-sample metric, and so cannot be used safely with Sample
|
||||
* or AlleleCount stratifications as this introduces an O(n^2) memory and cpu cost.
|
||||
*
|
||||
* @return the set of VariantEvaluators that cannot be active with this Stratification
|
||||
*/
|
||||
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,8 +62,7 @@ import java.util.*;
|
|||
class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
|
||||
int key = -1;
|
||||
final T stratifier;
|
||||
// TODO -- track state key that maps to root node
|
||||
final Map<Object, StratNode<T>> subnodes;
|
||||
final Map<Object, StratNode<T>> subnodes; // NOTE, because we don't iterate our best option is a HashMap
|
||||
|
||||
protected StratNode() {
|
||||
this.subnodes = Collections.emptyMap();
|
||||
|
|
@ -72,7 +71,8 @@ class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
|
|||
|
||||
protected StratNode(final T stratifier, final Map<Object, StratNode<T>> subnodes) {
|
||||
this.stratifier = stratifier;
|
||||
this.subnodes = subnodes;
|
||||
// important to reallocate an unmodififable hashmap with this specific size for space and safety
|
||||
this.subnodes = Collections.unmodifiableMap(new HashMap<Object, StratNode<T>>(subnodes));
|
||||
}
|
||||
|
||||
@Requires("key >= 0")
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
|
|||
if ( node.isLeaf() ) { // we're here!
|
||||
if ( states.isEmpty() )
|
||||
throw new ReviewedStingException("Found a leaf node with an empty state values vector");
|
||||
stratifierValuesByKey.set(node.getKey(), new ArrayList<Object>(states));
|
||||
stratifierValuesByKey.set(node.getKey(), Collections.unmodifiableList(new ArrayList<Object>(states)));
|
||||
} else {
|
||||
for ( Map.Entry<Object, StratNode<K>> entry : node.getSubnodes().entrySet() ) {
|
||||
final LinkedList<Object> newStates = new LinkedList<Object>(states);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -491,4 +492,18 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
);
|
||||
executeTest("testModernVCFWithLargeIndels", spec);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testIncompatibleEvalAndStrat() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
|
||||
"-L 20 -noST -ST AlleleCount -noEV -EV VariantSummary"
|
||||
),
|
||||
0,
|
||||
UserException.class);
|
||||
executeTest("testIncompatibleEvalAndStrat", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue