Parallelized SelectVariants

* can now use -nt with SelectVariants for significant speedup in large files
* added parallelization integration tests for SelectVariants
This commit is contained in:
Mauricio Carneiro 2011-12-12 18:41:04 -05:00
parent 663184ee9d
commit 5cc1e72fdb
2 changed files with 29 additions and 1 deletions

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -179,7 +180,7 @@ import java.util.*;
* </pre>
*
*/
public class SelectVariants extends RodWalker<Integer, Integer> {
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
@ -609,6 +610,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Override
public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override
public Integer treeReduce(Integer lhs, Integer rhs) {
return lhs + rhs;
}
public void onTraversalDone(Integer result) {
logger.info(result + " records processed.");

View File

@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testUsingDbsnpName--" + testFile, spec);
}
@Test
public void testParallelization() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (2 threads)--" + testfile, spec);
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
executeTest("testParallelization (4 threads)--" + testfile, spec);
}
}