diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index e571bb4c1..5421960b2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -505,7 +505,7 @@ public class VariantContextUtils { final Set alleles = new LinkedHashSet(); final Set filters = new HashSet(); - final Map attributes = new TreeMap(); + final Map attributes = new LinkedHashMap(); final Set inconsistentAttributes = new HashSet(); final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant final Set rsIDs = new LinkedHashSet(1); // most of the time there's one id @@ -513,7 +513,7 @@ public class VariantContextUtils { GenomeLoc loc = getLocation(genomeLocParser,first); int depth = 0; int maxAC = -1; - final Map attributesWithMaxAC = new TreeMap(); + final Map attributesWithMaxAC = new LinkedHashMap(); double log10PError = 1; VariantContext vcWithMaxAC = null; GenotypesContext genotypes = GenotypesContext.create(); @@ -657,7 +657,7 @@ public class VariantContextUtils { builder.genotypes(genotypes); builder.log10PError(log10PError); builder.filters(filters.isEmpty() ? filters : new TreeSet(filters)); - builder.attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); + builder.attributes(new TreeMap(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes)); // Trim the padded bases of all alleles if necessary final VariantContext merged = builder.make(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 2f28a6dbc..9ea751b72 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -45,12 +45,16 @@ public class CombineVariantsIntegrationTest extends WalkerTest { // TODO TODO TODO TODO TODO TODO TODO TODO // private static String baseTestString(String args) { - return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args; + return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; + //return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -U LENIENT_VCF_PROCESSING -R " + b36KGReference + args; } - private void cvExecuteTest(final String name, final WalkerTestSpec spec) { + private void cvExecuteTest(final String name, final WalkerTestSpec spec, final boolean parallel) { spec.disableShadowBCF(); - executeTestParallel(name, spec); + if ( parallel ) + executeTestParallel(name, spec); + else + executeTest(name, spec); } public void test1InOut(String file, String md5) { @@ -62,15 +66,19 @@ public class CombineVariantsIntegrationTest extends WalkerTest { baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args), 1, Arrays.asList(md5)); - cvExecuteTest("testInOut1--" + file, spec); + cvExecuteTest("testInOut1--" + file, spec, true); } public void combine2(String file1, String file2, String args, String md5) { + combine2(file1, file2, args, md5, true); + } + + public void combine2(String file1, String file2, String args, String md5, final boolean parallel) { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args), 1, Arrays.asList(md5)); - cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, parallel); } public void combineSites(String args, String md5) { @@ -82,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { + " -V:hm3 " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); - cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true); } public void combinePLs(String file1, String file2, String md5) { @@ -90,26 +98,29 @@ public class CombineVariantsIntegrationTest extends WalkerTest { "-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2, 1, Arrays.asList(md5)); - cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec, true); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); } - @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo"); } - @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null"); } + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b", " -U LENIENT_VCF_PROCESSING"); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "a4cedaa83d54e34cafc3ac4b80acf5b4", " -setKey foo -U LENIENT_VCF_PROCESSING"); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ac58a5fde17661e2a19004ca954d9781", " -setKey null -U LENIENT_VCF_PROCESSING"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "67a8076e30b4bca0ea5acdc9cd26a4e0"); } // official project VCF files in tabix format @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "909c6dc74eeb5ab86f8e74073eb0c1d6"); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "f0c2cb3e3a6160e1ed0ee2fd9b120f55"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "381875b3280ba56eef0152e56f64f68d"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "4159a0c0d7c15852a3a545e0bea6bbc5"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "5d04f22ef88ed9226cbd7b4483c5cb23"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "61d0ded244895234ac727391f29f13a8"); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e54d0dcf14f90d5c8e58b45191dd0219"); } - @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "0b1815c699e71e143ed129bfadaffbcb"); } + @Test public void uniqueSNPs() { + // parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s + combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "acc70f33be741b564f7be9aa3f819dd4", true); + } @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); } @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5f61145949180bf2a0cd342d8e064860"); } @@ -122,11 +133,12 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + + " -U LENIENT_VCF_PROCESSING" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019")); - cvExecuteTest("threeWayWithRefs", spec); + cvExecuteTest("threeWayWithRefs", spec, true); } // complex examples with filtering, indels, and multiple alleles @@ -139,7 +151,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { + " -V:two " + privateTestDir + file2 + args, 1, Arrays.asList(md5)); - cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec, true); } @Test public void complexTestFull() { combineComplexSites("", "9d989053826ffe5bef7c4e05ac51bcca"); } @@ -153,6 +165,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest { "-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, 1, Arrays.asList("aa926eae333208dc1f41fe69dc95d7a6")); - cvExecuteTest("combineDBSNPDuplicateSites:", spec); + cvExecuteTest("combineDBSNPDuplicateSites:", spec, true); } } \ No newline at end of file