diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 81007f9ff..01d398c1a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -48,10 +48,8 @@ import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, delRef, ATC; - Genotype ref1, snp1, snp2, indel1, indelref; + Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; - VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; @BeforeSuite public void setup() { @@ -68,22 +66,9 @@ public class VariantContextUtilsUnitTest extends BaseTest { Aref = Allele.create("A", true); delRef = Allele.create("-", true); T = Allele.create("T"); + C = Allele.create("C"); ATC = Allele.create("ATC"); - - ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); - snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); - snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); - indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); - indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); - - refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); - snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); - snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); - snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); - snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); - indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); - indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); - indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + ATCATC = Allele.create("ATCATC"); } private VariantContext makeVC(String source, List alleles) { @@ -98,45 +83,124 @@ public class VariantContextUtilsUnitTest extends BaseTest { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; return new VariantContext(source, "1", start, stop, alleles, - VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), 1.0, filters, null, (byte)'C'); } - private class SimpleMergeTest extends TestDataProvider { - List inputVCs; - VariantContext expectedVC; + // -------------------------------------------------------------------------------- + // + // Test allele merging + // + // -------------------------------------------------------------------------------- - private SimpleMergeTest(VariantContext... vcsArg) { - super(SimpleMergeTest.class); - LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); - expectedVC = allVCs.pollLast(); - inputVCs = allVCs; + private class MergeAllelesTest extends TestDataProvider { + List> inputs; + List expected; + + private MergeAllelesTest(List... arg) { + super(MergeAllelesTest.class); + LinkedList> all = new LinkedList>(Arrays.asList(arg)); + expected = all.pollLast(); + inputs = all; } public String toString() { - return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected); } } - - @DataProvider(name = "simplemergedata") - public Object[][] createSimpleMergeData() { + @DataProvider(name = "mergeAlleles") + public Object[][] mergeAllelesData() { // first, do no harm - new SimpleMergeTest(refVC, refVC); - new SimpleMergeTest(snpVC1, snpVC1); - new SimpleMergeTest(indelVC1, indelVC1); - new SimpleMergeTest(indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(refVC, snpVC1, snpVC3); - new SimpleMergeTest(snpVC1, snpVC2, snpVC2); - new SimpleMergeTest(refVC, snpVC2, snpVC4); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(indelVC1, indelVC2, indelVC3); - new SimpleMergeTest(indelVC1, indelVC3, indelVC3); - new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); - return SimpleMergeTest.getTests(SimpleMergeTest.class); + new MergeAllelesTest(Arrays.asList(Aref, C), + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(Aref, C, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T, C), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef)); // todo -- FIXME me GdA + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + return MergeAllelesTest.getTests(MergeAllelesTest.class); } + @Test(dataProvider = "mergeAlleles") + public void testMergeAlleles(MergeAllelesTest cfg) { + final List priority = new ArrayList(); + final List inputs = new ArrayList(); + + int i = 0; + for ( final List alleles : cfg.inputs ) { + final String name = "vcf" + ++i; + priority.add(name); + inputs.add(makeVC(name, alleles)); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + Assert.assertEquals(merged.getAlleles(), cfg.expected); + } + +// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; +// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); +// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); +// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); +// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); +// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); +// +// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); +// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); +// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); +// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); +// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); +// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); +// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); +// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + + // -------------------------------------------------------------------------------- + // + // Test rsID merging + // + // -------------------------------------------------------------------------------- + private class SimpleMergeRSIDTest extends TestDataProvider { List inputs; String expected; @@ -156,10 +220,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "simplemergersiddata") public Object[][] createSimpleMergeRSIDData() { new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest(".", ".", "."); new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs1", "rs1"); new SimpleMergeRSIDTest(".", "rs1", "rs1"); new SimpleMergeRSIDTest("rs1", ".", "rs1"); new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); @@ -171,32 +238,25 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "simplemergersiddata") public void testRSIDMerge(SimpleMergeRSIDTest cfg) { - List inputs = new ArrayList(); - for ( String id : cfg.inputs ) { + final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T)); + final List inputs = new ArrayList(); + + for ( final String id : cfg.inputs ) { MutableVariantContext vc = new MutableVariantContext(snpVC1); if ( ! id.equals(".") ) vc.setID(id); inputs.add(vc); - } - VariantContext merged = myMerge(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, null, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } - private VariantContext myMerge(List inputs) { - List priority = new ArrayList(); - for ( VariantContext vc : inputs ) priority.add(vc.getSource()); - - return VariantContextUtils.simpleMerge(genomeLocParser, - inputs, priority, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); - } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType // todo -- no annotate origin // todo -- test set key - // todo -- test filtered are uncalled }