CombineVariants parallel integration tests

-- All tests but one (using old bad VCF3 input) run unmodified with parallel code.
-- Disabled UNSAFE_VCF_PROCESSING for all but that test, which changes md5s because the output files have fixed headers
-- Minor optimizations to simpleMerge
This commit is contained in:
Mark DePristo 2012-08-15 21:12:21 -04:00
parent 669c43031a
commit c0a31b2e5b
2 changed files with 32 additions and 20 deletions

View File

@ -505,7 +505,7 @@ public class VariantContextUtils {
final Set<Allele> alleles = new LinkedHashSet<Allele>();
final Set<String> filters = new HashSet<String>();
final Map<String, Object> attributes = new TreeMap<String, Object>();
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
final Set<String> inconsistentAttributes = new HashSet<String>();
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
@ -513,7 +513,7 @@ public class VariantContextUtils {
GenomeLoc loc = getLocation(genomeLocParser,first);
int depth = 0;
int maxAC = -1;
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
double log10PError = 1;
VariantContext vcWithMaxAC = null;
GenotypesContext genotypes = GenotypesContext.create();
@ -657,7 +657,7 @@ public class VariantContextUtils {
builder.genotypes(genotypes);
builder.log10PError(log10PError);
builder.filters(filters.isEmpty() ? filters : new TreeSet<String>(filters));
builder.attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
// Trim the padded bases of all alleles if necessary
final VariantContext merged = builder.make();

View File

@ -45,12 +45,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
// TODO TODO TODO TODO TODO TODO TODO TODO
//
private static String baseTestString(String args) {
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
//return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
}
private void cvExecuteTest(final String name, final WalkerTestSpec spec) {
private void cvExecuteTest(final String name, final WalkerTestSpec spec, final boolean parallel) {
spec.disableShadowBCF();
executeTestParallel(name, spec);
if ( parallel )
executeTestParallel(name, spec);
else
executeTest(name, spec);
}
public void test1InOut(String file, String md5) {
@ -62,15 +66,19 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args),
1,
Arrays.asList(md5));
cvExecuteTest("testInOut1--" + file, spec);
cvExecuteTest("testInOut1--" + file, spec, true);
}
public void combine2(String file1, String file2, String args, String md5) {
combine2(file1, file2, args, md5, true);
}
public void combine2(String file1, String file2, String args, String md5, final boolean parallel) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args),
1,
Arrays.asList(md5));
cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, parallel);
}
public void combineSites(String args, String md5) {
@ -82,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
+ " -V:hm3 " + validationDataLocation + file2 + args,
1,
Arrays.asList(md5));
cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true);
}
public void combinePLs(String file1, String file2, String md5) {
@ -90,26 +98,29 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2,
1,
Arrays.asList(md5));
cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, true);
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b", " -U LENIENT_VCF_PROCESSING"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo -U LENIENT_VCF_PROCESSING"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null -U LENIENT_VCF_PROCESSING"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "381875b3280ba56eef0152e56f64f68d"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
@Test public void uniqueSNPs() {
// parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s
combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true);
}
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5f61145949180bf2a0cd342d8e064860"); }
@ -122,11 +133,12 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
" -setKey centerSet" +
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
" -U LENIENT_VCF_PROCESSING" +
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019"));
cvExecuteTest("threeWayWithRefs", spec);
cvExecuteTest("threeWayWithRefs", spec, true);
}
// complex examples with filtering, indels, and multiple alleles
@ -139,7 +151,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
+ " -V:two " + privateTestDir + file2 + args,
1,
Arrays.asList(md5));
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true);
}
@Test public void complexTestFull() { combineComplexSites("", "9d989053826ffe5bef7c4e05ac51bcca"); }
@ -153,6 +165,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
"-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
1,
Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6"));
cvExecuteTest("combineDBSNPDuplicateSites:", spec);
cvExecuteTest("combineDBSNPDuplicateSites:", spec, true);
}
}