Genotype merging unit tests for simpleMerge

-- Remaining TODOs are all for GdA
This commit is contained in:
Mark DePristo 2011-09-23 08:24:49 -04:00
parent 4397ce8653
commit a9f073fa68
1 changed files with 234 additions and 85 deletions

View File

@ -21,14 +21,8 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.BaseTest;
@ -47,7 +41,6 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, C, delRef, ATC, ATCATC;
private GenomeLocParser genomeLocParser;
@ -72,10 +65,22 @@ public class VariantContextUtilsUnitTest extends BaseTest {
ATCATC = Allele.create("ATCATC");
}
private Genotype makeG(String sample, Allele a1, Allele a2) {
return new Genotype(sample, Arrays.asList(a1, a2));
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
}
private VariantContext makeVC(String source, List<Allele> alleles) {
return makeVC(source, alleles, null, null);
}
private VariantContext makeVC(String source, List<Allele> alleles, Genotype... g1) {
return makeVC(source, alleles, Arrays.asList(g1));
}
private VariantContext makeVC(String source, List<Allele> alleles, String filter) {
return makeVC(source, alleles, filter.equals(".") ? null : new HashSet<String>(Arrays.asList(filter)));
}
@ -121,66 +126,66 @@ public class VariantContextUtilsUnitTest extends BaseTest {
public Object[][] mergeAllelesData() {
// first, do no harm
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref));
Arrays.asList(Aref));
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref),
Arrays.asList(Aref));
Arrays.asList(Aref),
Arrays.asList(Aref));
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref, T),
Arrays.asList(Aref, T));
Arrays.asList(Aref, T),
Arrays.asList(Aref, T));
new MergeAllelesTest(Arrays.asList(Aref, C),
Arrays.asList(Aref, T),
Arrays.asList(Aref, C, T));
Arrays.asList(Aref, T),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(Aref, C, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T));
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T, C),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef)); // todo -- FIXME me GdA
Arrays.asList(delRef)); // todo -- FIXME me GdA
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATC));
Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
return MergeAllelesTest.getTests(MergeAllelesTest.class);
}
@Test(dataProvider = "mergeAlleles")
public void testMergeAlleles(MergeAllelesTest cfg) {
final List<String> priority = new ArrayList<String>();
final List<VariantContext> inputs = new ArrayList<VariantContext>();
int i = 0;
for ( final List<Allele> alleles : cfg.inputs ) {
final String name = "vcf" + ++i;
priority.add(name);
inputs.add(makeVC(name, alleles));
}
final List<String> priority = vcs2priority(inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, priority,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
@ -281,87 +286,82 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@DataProvider(name = "mergeFiltered")
public Object[][] mergeFilteredData() {
new MergeFilteredTest("AllPass",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("noFilters",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.MERGE_INTERSECTION);
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("oneFiltered",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("onePassOneFail",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("AllFiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.MERGE_FILTER_IN_ALL);
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.MERGE_FILTER_IN_ALL);
// test ALL vs. ANY
new MergeFilteredTest("FailOneUnfiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("OneFailAllUnfilteredArg",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
// test excluding allele in filtered record
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
// promotion of site from unfiltered to PASSES
new MergeFilteredTest("UnfilteredPlusPassIsPass",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("RefInAll",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_REF_IN_ALL);
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_REF_IN_ALL);
new MergeFilteredTest("RefInOne",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
"2");
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
"2");
return MergeFilteredTest.getTests(MergeFilteredTest.class);
}
@Test(dataProvider = "mergeFiltered")
public void testMergeFiltered(MergeFilteredTest cfg) {
final List<String> priority = new ArrayList<String>();
for ( final VariantContext vc : cfg.inputs ) {
priority.add(vc.getSource());
}
final List<String> priority = vcs2priority(cfg.inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
@ -375,6 +375,148 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
}
// --------------------------------------------------------------------------------
//
// Test genotype merging
//
// --------------------------------------------------------------------------------
private class MergeGenotypesTest extends TestDataProvider {
List<VariantContext> inputs;
VariantContext expected;
List<String> priority;
private MergeGenotypesTest(String name, String priority, VariantContext... arg) {
super(MergeGenotypesTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(arg));
this.expected = all.pollLast();
inputs = all;
this.priority = Arrays.asList(priority.split(","));
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
}
@DataProvider(name = "mergeGenotypes")
public Object[][] mergeGenotypesData() {
new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)));
new MergeGenotypesTest("NonOverlappingGenotypes", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PreserveNoCall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PerserveAlleles", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)),
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)));
// todo -- GDA -- add tests for merging correct PLs
return MergeGenotypesTest.getTests(MergeGenotypesTest.class);
}
// todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE
@Test(dataProvider = "mergeGenotypes")
public void testMergeGenotypes(MergeGenotypesTest cfg) {
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test genotypes
assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
}
// necessary to not overload equals for genotypes
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) {
if (actual == expected) {
return;
}
if (actual == null || expected == null) {
Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual);
}
if (actual.size() != expected.size()) {
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
}
for (Map.Entry<String, Genotype> entry : actual.entrySet()) {
String key = entry.getKey();
Genotype value = entry.getValue();
Genotype expectedValue = expected.get(key);
Assert.assertEquals(value.alleles, expectedValue.alleles);
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError());
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods());
if ( value.hasLikelihoods() )
Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods());
}
}
@Test
public void testMergeGenotypesUniquify() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
// test genotypes
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
}
@Test(expectedExceptions = UserException.class)
public void testMergeGenotypesRequireUnique() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
}
// --------------------------------------------------------------------------------
//
// Misc. tests
//
// --------------------------------------------------------------------------------
@Test
public void testAnnotationSet() {
for ( final boolean annotate : Arrays.asList(true, false)) {
@ -395,6 +537,13 @@ public class VariantContextUtilsUnitTest extends BaseTest {
}
}
// todo -- add tests for subset merging, especially with correct PLs
// todo -- test priority list
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) {
final List<String> priority = new ArrayList<String>();
for ( final VariantContext vc : vcs ) {
priority.add(vc.getSource());
}
return priority;
}
}