Memory usage optimizations and safety improvements to StratNode and StratificationManager
-- Added memory and safety optimizations to StratNode and StratificationManager. Fresh, immutable Hashmaps are allocated for final data structures, so they exactly the correct size and cannot be changed by users. -- Added ability of a stratification to specify incompatible evaluation. The two strats using this are AC and Sample with VariantSummary, as this computes per-sample averages and so combining these results in an O(n^2) memory requirement. Added integration test to cover incompatible strats and evals
This commit is contained in:
parent
b335c22f6d
commit
097ed4ecc4
|
|
@ -269,7 +269,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
// Initialize the set of stratifications and evaluations to use
|
// Initialize the set of stratifications and evaluations to use
|
||||||
// The list of stratifiers and evaluators to use
|
// The list of stratifiers and evaluators to use
|
||||||
final List<VariantStratifier> stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
final List<VariantStratifier> stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||||
final Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
final Set<Class<? extends VariantEvaluator>> evaluationClasses = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||||
|
|
||||||
|
checkForIncompatibleEvaluatorsAndStratifiers(stratificationObjects, evaluationClasses);
|
||||||
|
|
||||||
for ( VariantStratifier vs : stratificationObjects ) {
|
for ( VariantStratifier vs : stratificationObjects ) {
|
||||||
if ( vs.getName().equals("Filter") )
|
if ( vs.getName().equals("Filter") )
|
||||||
|
|
@ -289,10 +291,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the evaluation contexts
|
// Initialize the evaluation contexts
|
||||||
createStratificationStates(stratificationObjects, evaluationObjects);
|
createStratificationStates(stratificationObjects, evaluationClasses);
|
||||||
|
|
||||||
// Initialize report table
|
// Initialize report table
|
||||||
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);
|
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationClasses);
|
||||||
|
|
||||||
// Load ancestral alignments
|
// Load ancestral alignments
|
||||||
if (ancestralAlignmentsFile != null) {
|
if (ancestralAlignmentsFile != null) {
|
||||||
|
|
@ -309,6 +311,19 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final void checkForIncompatibleEvaluatorsAndStratifiers( final List<VariantStratifier> stratificationObjects,
|
||||||
|
Set<Class<? extends VariantEvaluator>> evaluationClasses) {
|
||||||
|
for ( final VariantStratifier vs : stratificationObjects ) {
|
||||||
|
for ( Class<? extends VariantEvaluator> ec : evaluationClasses )
|
||||||
|
if ( vs.getIncompatibleEvaluators().contains(ec) )
|
||||||
|
throw new UserException.BadArgumentValue("ST and ET",
|
||||||
|
"The selected stratification " + vs.getName() +
|
||||||
|
" and evaluator " + ec.getSimpleName() +
|
||||||
|
" are incompatible due to combinatorial memory requirements." +
|
||||||
|
" Please disable one");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final void createStratificationStates(final List<VariantStratifier> stratificationObjects, final Set<Class<? extends VariantEvaluator>> evaluationObjects) {
|
final void createStratificationStates(final List<VariantStratifier> stratificationObjects, final Set<Class<? extends VariantEvaluator>> evaluationObjects) {
|
||||||
final List<VariantStratifier> strats = new ArrayList<VariantStratifier>(stratificationObjects);
|
final List<VariantStratifier> strats = new ArrayList<VariantStratifier>(stratificationObjects);
|
||||||
stratManager = new StratificationManager<VariantStratifier, EvaluationContext>(strats);
|
stratManager = new StratificationManager<VariantStratifier, EvaluationContext>(strats);
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stratifies the eval RODs by the allele count of the alternate allele
|
* Stratifies the eval RODs by the allele count of the alternate allele
|
||||||
|
|
@ -50,9 +50,14 @@ public class AlleleCount extends VariantStratifier {
|
||||||
} else
|
} else
|
||||||
// by default, the site is considered monomorphic
|
// by default, the site is considered monomorphic
|
||||||
AC = 0;
|
AC = 0;
|
||||||
return Collections.singletonList((Object)AC);
|
return Collections.singletonList((Object) AC);
|
||||||
} else {
|
} else {
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||||
|
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stratifies the eval RODs by each sample in the eval ROD.
|
* Stratifies the eval RODs by each sample in the eval ROD.
|
||||||
|
|
@ -22,6 +22,11 @@ public class Sample extends VariantStratifier {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||||
return Collections.singletonList((Object)sampleName);
|
return Collections.singletonList((Object) sampleName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||||
|
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.Stratifier;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.Stratifier;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
public abstract class VariantStratifier implements Comparable<VariantStratifier>, Stratifier {
|
public abstract class VariantStratifier implements Comparable<VariantStratifier>, Stratifier {
|
||||||
private VariantEvalWalker variantEvalWalker;
|
private VariantEvalWalker variantEvalWalker;
|
||||||
|
|
@ -65,4 +68,16 @@ public abstract class VariantStratifier implements Comparable<VariantStratifier>
|
||||||
public final ArrayList<Object> getAllStates() {
|
public final ArrayList<Object> getAllStates() {
|
||||||
return states;
|
return states;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The way for a stratifier to specify that it's incompatible with specific evaluations. For
|
||||||
|
* example, VariantSummary includes a per-sample metric, and so cannot be used safely with Sample
|
||||||
|
* or AlleleCount stratifications as this introduces an O(n^2) memory and cpu cost.
|
||||||
|
*
|
||||||
|
* @return the set of VariantEvaluators that cannot be active with this Stratification
|
||||||
|
*/
|
||||||
|
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,8 +62,7 @@ import java.util.*;
|
||||||
class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
|
class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
|
||||||
int key = -1;
|
int key = -1;
|
||||||
final T stratifier;
|
final T stratifier;
|
||||||
// TODO -- track state key that maps to root node
|
final Map<Object, StratNode<T>> subnodes; // NOTE, because we don't iterate our best option is a HashMap
|
||||||
final Map<Object, StratNode<T>> subnodes;
|
|
||||||
|
|
||||||
protected StratNode() {
|
protected StratNode() {
|
||||||
this.subnodes = Collections.emptyMap();
|
this.subnodes = Collections.emptyMap();
|
||||||
|
|
@ -72,7 +71,8 @@ class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
|
||||||
|
|
||||||
protected StratNode(final T stratifier, final Map<Object, StratNode<T>> subnodes) {
|
protected StratNode(final T stratifier, final Map<Object, StratNode<T>> subnodes) {
|
||||||
this.stratifier = stratifier;
|
this.stratifier = stratifier;
|
||||||
this.subnodes = subnodes;
|
// important to reallocate an unmodififable hashmap with this specific size for space and safety
|
||||||
|
this.subnodes = Collections.unmodifiableMap(new HashMap<Object, StratNode<T>>(subnodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("key >= 0")
|
@Requires("key >= 0")
|
||||||
|
|
|
||||||
|
|
@ -118,7 +118,7 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
|
||||||
if ( node.isLeaf() ) { // we're here!
|
if ( node.isLeaf() ) { // we're here!
|
||||||
if ( states.isEmpty() )
|
if ( states.isEmpty() )
|
||||||
throw new ReviewedStingException("Found a leaf node with an empty state values vector");
|
throw new ReviewedStingException("Found a leaf node with an empty state values vector");
|
||||||
stratifierValuesByKey.set(node.getKey(), new ArrayList<Object>(states));
|
stratifierValuesByKey.set(node.getKey(), Collections.unmodifiableList(new ArrayList<Object>(states)));
|
||||||
} else {
|
} else {
|
||||||
for ( Map.Entry<Object, StratNode<K>> entry : node.getSubnodes().entrySet() ) {
|
for ( Map.Entry<Object, StratNode<K>> entry : node.getSubnodes().entrySet() ) {
|
||||||
final LinkedList<Object> newStates = new LinkedList<Object>(states);
|
final LinkedList<Object> newStates = new LinkedList<Object>(states);
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
@ -491,4 +492,18 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
);
|
);
|
||||||
executeTest("testModernVCFWithLargeIndels", spec);
|
executeTest("testModernVCFWithLargeIndels", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test()
|
||||||
|
public void testIncompatibleEvalAndStrat() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T VariantEval",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
|
||||||
|
"-L 20 -noST -ST AlleleCount -noEV -EV VariantSummary"
|
||||||
|
),
|
||||||
|
0,
|
||||||
|
UserException.class);
|
||||||
|
executeTest("testIncompatibleEvalAndStrat", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue