CombineVariants parallel integration tests
-- All tests but one (using old bad VCF3 input) run unmodified with parallel code. -- Disabled UNSAFE_VCF_PROCESSING for all but that test, which changes md5s because the output files have fixed headers -- Minor optimizations to simpleMerge
This commit is contained in:
parent
669c43031a
commit
c0a31b2e5b
|
|
@ -505,7 +505,7 @@ public class VariantContextUtils {
|
|||
|
||||
final Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
final Map<String, Object> attributes = new TreeMap<String, Object>();
|
||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
|
||||
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
|
||||
|
|
@ -513,7 +513,7 @@ public class VariantContextUtils {
|
|||
GenomeLoc loc = getLocation(genomeLocParser,first);
|
||||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
||||
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
|
||||
double log10PError = 1;
|
||||
VariantContext vcWithMaxAC = null;
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
|
@ -657,7 +657,7 @@ public class VariantContextUtils {
|
|||
builder.genotypes(genotypes);
|
||||
builder.log10PError(log10PError);
|
||||
builder.filters(filters.isEmpty() ? filters : new TreeSet<String>(filters));
|
||||
builder.attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
||||
builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
|
||||
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
final VariantContext merged = builder.make();
|
||||
|
|
|
|||
|
|
@ -45,12 +45,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
//
|
||||
private static String baseTestString(String args) {
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
//return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args;
|
||||
}
|
||||
|
||||
private void cvExecuteTest(final String name, final WalkerTestSpec spec) {
|
||||
private void cvExecuteTest(final String name, final WalkerTestSpec spec, final boolean parallel) {
|
||||
spec.disableShadowBCF();
|
||||
executeTestParallel(name, spec);
|
||||
if ( parallel )
|
||||
executeTestParallel(name, spec);
|
||||
else
|
||||
executeTest(name, spec);
|
||||
}
|
||||
|
||||
public void test1InOut(String file, String md5) {
|
||||
|
|
@ -62,15 +66,19 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
cvExecuteTest("testInOut1--" + file, spec);
|
||||
cvExecuteTest("testInOut1--" + file, spec, true);
|
||||
}
|
||||
|
||||
public void combine2(String file1, String file2, String args, String md5) {
|
||||
combine2(file1, file2, args, md5, true);
|
||||
}
|
||||
|
||||
public void combine2(String file1, String file2, String args, String md5, final boolean parallel) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, parallel);
|
||||
}
|
||||
|
||||
public void combineSites(String args, String md5) {
|
||||
|
|
@ -82,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
+ " -V:hm3 " + validationDataLocation + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true);
|
||||
}
|
||||
|
||||
public void combinePLs(String file1, String file2, String md5) {
|
||||
|
|
@ -90,26 +98,29 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, true);
|
||||
}
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo"); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); }
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b", " -U LENIENT_VCF_PROCESSING"); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo -U LENIENT_VCF_PROCESSING"); }
|
||||
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null -U LENIENT_VCF_PROCESSING"); }
|
||||
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format
|
||||
|
||||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "381875b3280ba56eef0152e56f64f68d"); }
|
||||
|
||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); }
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); }
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); }
|
||||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); }
|
||||
@Test public void uniqueSNPs() {
|
||||
// parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s
|
||||
combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true);
|
||||
}
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5f61145949180bf2a0cd342d8e064860"); }
|
||||
|
|
@ -122,11 +133,12 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
|
||||
" -setKey centerSet" +
|
||||
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
" -U LENIENT_VCF_PROCESSING" +
|
||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019"));
|
||||
cvExecuteTest("threeWayWithRefs", spec);
|
||||
cvExecuteTest("threeWayWithRefs", spec, true);
|
||||
}
|
||||
|
||||
// complex examples with filtering, indels, and multiple alleles
|
||||
|
|
@ -139,7 +151,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
+ " -V:two " + privateTestDir + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "9d989053826ffe5bef7c4e05ac51bcca"); }
|
||||
|
|
@ -153,6 +165,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
"-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||
1,
|
||||
Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6"));
|
||||
cvExecuteTest("combineDBSNPDuplicateSites:", spec);
|
||||
cvExecuteTest("combineDBSNPDuplicateSites:", spec, true);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue