Memory usage optimizations and safety improvements to StratNode and StratificationManager

-- Added memory and safety optimizations to StratNode and StratificationManager.  Fresh, immutable Hashmaps are allocated for final data structures, so they exactly the correct size and cannot be changed by users.
-- Added ability of a stratification to specify incompatible evaluation.  The two strats using this are AC and Sample with VariantSummary, as this computes per-sample averages and so combining these results in an O(n^2) memory requirement.  Added integration test to cover incompatible strats and evals
This commit is contained in:
Mark DePristo 2012-03-30 08:32:48 -04:00
parent b335c22f6d
commit 097ed4ecc4
7 changed files with 70 additions and 15 deletions

View File

@ -269,7 +269,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Initialize the set of stratifications and evaluations to use
// The list of stratifiers and evaluators to use
final List<VariantStratifier> stratificationObjects = variantEvalUtils.initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
final Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
final Set<Class<? extends VariantEvaluator>> evaluationClasses = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
checkForIncompatibleEvaluatorsAndStratifiers(stratificationObjects, evaluationClasses);
for ( VariantStratifier vs : stratificationObjects ) {
if ( vs.getName().equals("Filter") )
@ -289,10 +291,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
// Initialize the evaluation contexts
createStratificationStates(stratificationObjects, evaluationObjects);
createStratificationStates(stratificationObjects, evaluationClasses);
// Initialize report table
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationClasses);
// Load ancestral alignments
if (ancestralAlignmentsFile != null) {
@ -309,6 +311,19 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
}
final void checkForIncompatibleEvaluatorsAndStratifiers( final List<VariantStratifier> stratificationObjects,
Set<Class<? extends VariantEvaluator>> evaluationClasses) {
for ( final VariantStratifier vs : stratificationObjects ) {
for ( Class<? extends VariantEvaluator> ec : evaluationClasses )
if ( vs.getIncompatibleEvaluators().contains(ec) )
throw new UserException.BadArgumentValue("ST and ET",
"The selected stratification " + vs.getName() +
" and evaluator " + ec.getSimpleName() +
" are incompatible due to combinatorial memory requirements." +
" Please disable one");
}
}
final void createStratificationStates(final List<VariantStratifier> stratificationObjects, final Set<Class<? extends VariantEvaluator>> evaluationObjects) {
final List<VariantStratifier> strats = new ArrayList<VariantStratifier>(stratificationObjects);
stratManager = new StratificationManager<VariantStratifier, EvaluationContext>(strats);

View File

@ -3,13 +3,13 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
* Stratifies the eval RODs by the allele count of the alternate allele
@ -50,9 +50,14 @@ public class AlleleCount extends VariantStratifier {
} else
// by default, the site is considered monomorphic
AC = 0;
return Collections.singletonList((Object)AC);
return Collections.singletonList((Object) AC);
} else {
return Collections.emptyList();
}
}
@Override
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
}
}

View File

@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
* Stratifies the eval RODs by each sample in the eval ROD.
@ -22,6 +22,11 @@ public class Sample extends VariantStratifier {
}
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return Collections.singletonList((Object)sampleName);
return Collections.singletonList((Object) sampleName);
}
@Override
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
}
}

View File

@ -3,11 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.Stratifier;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
public abstract class VariantStratifier implements Comparable<VariantStratifier>, Stratifier {
private VariantEvalWalker variantEvalWalker;
@ -65,4 +68,16 @@ public abstract class VariantStratifier implements Comparable<VariantStratifier>
public final ArrayList<Object> getAllStates() {
return states;
}
/**
* The way for a stratifier to specify that it's incompatible with specific evaluations. For
* example, VariantSummary includes a per-sample metric, and so cannot be used safely with Sample
* or AlleleCount stratifications as this introduces an O(n^2) memory and cpu cost.
*
* @return the set of VariantEvaluators that cannot be active with this Stratification
*/
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
return Collections.emptySet();
}
}

View File

@ -62,8 +62,7 @@ import java.util.*;
class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
int key = -1;
final T stratifier;
// TODO -- track state key that maps to root node
final Map<Object, StratNode<T>> subnodes;
final Map<Object, StratNode<T>> subnodes; // NOTE, because we don't iterate our best option is a HashMap
protected StratNode() {
this.subnodes = Collections.emptyMap();
@ -72,7 +71,8 @@ class StratNode<T extends Stratifier> implements Iterable<StratNode<T>> {
protected StratNode(final T stratifier, final Map<Object, StratNode<T>> subnodes) {
this.stratifier = stratifier;
this.subnodes = subnodes;
// important to reallocate an unmodififable hashmap with this specific size for space and safety
this.subnodes = Collections.unmodifiableMap(new HashMap<Object, StratNode<T>>(subnodes));
}
@Requires("key >= 0")

View File

@ -118,7 +118,7 @@ public class StratificationManager<K extends Stratifier, V> implements Map<List<
if ( node.isLeaf() ) { // we're here!
if ( states.isEmpty() )
throw new ReviewedStingException("Found a leaf node with an empty state values vector");
stratifierValuesByKey.set(node.getKey(), new ArrayList<Object>(states));
stratifierValuesByKey.set(node.getKey(), Collections.unmodifiableList(new ArrayList<Object>(states)));
} else {
for ( Map.Entry<Object, StratNode<K>> entry : node.getSubnodes().entrySet() ) {
final LinkedList<Object> newStates = new LinkedList<Object>(states);

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.util.Arrays;
@ -491,4 +492,18 @@ public class VariantEvalIntegrationTest extends WalkerTest {
);
executeTest("testModernVCFWithLargeIndels", spec);
}
@Test()
public void testIncompatibleEvalAndStrat() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
"-L 20 -noST -ST AlleleCount -noEV -EV VariantSummary"
),
0,
UserException.class);
executeTest("testIncompatibleEvalAndStrat", spec);
}
}