diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 577168578..2e233c18e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -25,16 +25,16 @@ public class Genotype { protected boolean isPhased = false; protected boolean filtersWereAppliedToContext; - public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null); } - public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { if ( alleles != null ) this.alleles = Collections.unmodifiableList(alleles); - if ( log10Likelihoods != null ) - attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); + if ( log10Likelihoods != null ) + commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; validate(); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 9ca29d41c..81007f9ff 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -30,6 +30,7 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -39,6 +40,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; import java.io.File; import java.io.FileNotFoundException; @@ -47,8 +49,9 @@ import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { Allele Aref, T, delRef, ATC; - Genotype snp1, snp2, indel1; + Genotype ref1, snp1, snp2, indel1, indelref; private GenomeLocParser genomeLocParser; + VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; @BeforeSuite public void setup() { @@ -67,18 +70,133 @@ public class VariantContextUtilsUnitTest extends BaseTest { T = Allele.create("T"); ATC = Allele.create("ATC"); + ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); + indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); + + refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); + snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); + snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); + snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); + snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); + indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); + indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); + indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); } private VariantContext makeVC(String source, List alleles) { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, Collection genotypes) { + return makeVC(source, alleles, genotypes, null); + } + private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { int start = 10; - int stop = alleles.contains(ATC) ? start + 3 : start; - return new VariantContext(source, "1", start, stop, alleles, genotypes, 1.0, filters, null); + int stop = start; // alleles.contains(ATC) ? start + 3 : start; + return new VariantContext(source, "1", start, stop, alleles, + VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + 1.0, filters, null, (byte)'C'); } + + private class SimpleMergeTest extends TestDataProvider { + List inputVCs; + VariantContext expectedVC; + + private SimpleMergeTest(VariantContext... vcsArg) { + super(SimpleMergeTest.class); + LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); + expectedVC = allVCs.pollLast(); + inputVCs = allVCs; + } + + public String toString() { + return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + } + } + + @DataProvider(name = "simplemergedata") + public Object[][] createSimpleMergeData() { + // first, do no harm + new SimpleMergeTest(refVC, refVC); + new SimpleMergeTest(snpVC1, snpVC1); + new SimpleMergeTest(indelVC1, indelVC1); + new SimpleMergeTest(indelVC3, indelVC3); + + new SimpleMergeTest(refVC, snpVC1, snpVC3); + new SimpleMergeTest(snpVC1, snpVC2, snpVC2); + new SimpleMergeTest(refVC, snpVC2, snpVC4); + + new SimpleMergeTest(indelVC1, indelVC2, indelVC3); + new SimpleMergeTest(indelVC1, indelVC3, indelVC3); + new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + + return SimpleMergeTest.getTests(SimpleMergeTest.class); + } + + private class SimpleMergeRSIDTest extends TestDataProvider { + List inputs; + String expected; + + private SimpleMergeRSIDTest(String... arg) { + super(SimpleMergeRSIDTest.class); + LinkedList allStrings = new LinkedList(Arrays.asList(arg)); + expected = allStrings.pollLast(); + inputs = allStrings; + } + + public String toString() { + return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected); + } + } + + @DataProvider(name = "simplemergersiddata") + public Object[][] createSimpleMergeRSIDData() { + new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest(".", "rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", ".", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); + new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); + new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); + new SimpleMergeRSIDTest("rs1", ".", ".", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3"); + + return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class); + } + + @Test(dataProvider = "simplemergersiddata") + public void testRSIDMerge(SimpleMergeRSIDTest cfg) { + List inputs = new ArrayList(); + for ( String id : cfg.inputs ) { + MutableVariantContext vc = new MutableVariantContext(snpVC1); + if ( ! id.equals(".") ) vc.setID(id); + inputs.add(vc); + + } + + VariantContext merged = myMerge(inputs); + Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); + } + + private VariantContext myMerge(List inputs) { + List priority = new ArrayList(); + for ( VariantContext vc : inputs ) priority.add(vc.getSource()); + + return VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + } + + // todo -- add tests for subset merging, especially with correct PLs + // todo -- test priority list + // todo -- test FilteredRecordMergeType + // todo -- no annotate origin + // todo -- test set key + // todo -- test filtered are uncalled }