diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 85d752003..2e233c18e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -26,14 +26,31 @@ public class Genotype { protected boolean filtersWereAppliedToContext; public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased) { + this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null); + } + + public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { if ( alleles != null ) this.alleles = Collections.unmodifiableList(alleles); commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); + if ( log10Likelihoods != null ) + commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods)); filtersWereAppliedToContext = filters != null; this.isPhased = isPhased; validate(); } + /** + * Creates a new Genotype for sampleName with genotype according to alleles. + * @param sampleName + * @param alleles + * @param negLog10PError the confidence in these alleles + * @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known + */ + public Genotype(String sampleName, List alleles, double negLog10PError, double[] log10Likelihoods) { + this(sampleName, alleles, negLog10PError, null, null, false, log10Likelihoods); + } + public Genotype(String sampleName, List alleles, double negLog10PError) { this(sampleName, alleles, negLog10PError, null, null, false); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index ac5a87a1f..0205fe0eb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -89,8 +89,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); } @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format - @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); } + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9")); + Arrays.asList("212d9d3df10bb29e2c7fb226da422dc0")); executeTest("threeWayWithRefs", spec); } @@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, 1, - Arrays.asList("")); + Arrays.asList("5969446769cb8377daa2db29304ae6b5")); executeTest("combineDBSNPDuplicateSites:", spec); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index a8e6593b1..35c6a4993 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest { public static VCFHeader createFakeHeader(Set metaData, Set additionalColumns) { metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); metaData.add(new VCFHeaderLine("two", "2")); - additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); return new VCFHeader(metaData, additionalColumns); @@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest { Assert.assertTrue(additionalColumns.contains(key)); index++; } - Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */); + Assert.assertEquals(index, additionalColumns.size()); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index f8e6da20a..663eb9ef6 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -5,6 +5,7 @@ package org.broadinstitute.sting.utils.variantcontext; // the imports for unit testing. +import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeTest; @@ -14,10 +15,7 @@ import java.util.Arrays; import java.util.List; -/** - * Basic unit test for RecalData - */ -public class VariantContextUnitTest { +public class VariantContextUnitTest extends BaseTest { Allele A, Aref, T, Tref; Allele del, delRef, ATC, ATCref; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java new file mode 100644 index 000000000..81007f9ff --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.variantcontext; + + +// the imports for unit testing. + + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.apache.log4j.Priority; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Test; +import org.testng.annotations.DataProvider; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + + +public class VariantContextUtilsUnitTest extends BaseTest { + Allele Aref, T, delRef, ATC; + Genotype ref1, snp1, snp2, indel1, indelref; + private GenomeLocParser genomeLocParser; + VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; + + @BeforeSuite + public void setup() { + final File referenceFile = new File(b37KGReference); + try { + IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile); + genomeLocParser = new GenomeLocParser(seq); + } + catch(FileNotFoundException ex) { + throw new UserException.CouldNotReadInputFile(referenceFile,ex); + } + + // alleles + Aref = Allele.create("A", true); + delRef = Allele.create("-", true); + T = Allele.create("T"); + ATC = Allele.create("ATC"); + + ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); + snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); + snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); + indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); + indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); + + refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); + snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); + snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); + snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); + snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); + indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); + indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); + indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + } + + private VariantContext makeVC(String source, List alleles) { + return makeVC(source, alleles, null, null); + } + + private VariantContext makeVC(String source, List alleles, Collection genotypes) { + return makeVC(source, alleles, genotypes, null); + } + + private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { + int start = 10; + int stop = start; // alleles.contains(ATC) ? start + 3 : start; + return new VariantContext(source, "1", start, stop, alleles, + VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + 1.0, filters, null, (byte)'C'); + } + + private class SimpleMergeTest extends TestDataProvider { + List inputVCs; + VariantContext expectedVC; + + private SimpleMergeTest(VariantContext... vcsArg) { + super(SimpleMergeTest.class); + LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); + expectedVC = allVCs.pollLast(); + inputVCs = allVCs; + } + + public String toString() { + return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + } + } + + @DataProvider(name = "simplemergedata") + public Object[][] createSimpleMergeData() { + // first, do no harm + new SimpleMergeTest(refVC, refVC); + new SimpleMergeTest(snpVC1, snpVC1); + new SimpleMergeTest(indelVC1, indelVC1); + new SimpleMergeTest(indelVC3, indelVC3); + + new SimpleMergeTest(refVC, snpVC1, snpVC3); + new SimpleMergeTest(snpVC1, snpVC2, snpVC2); + new SimpleMergeTest(refVC, snpVC2, snpVC4); + + new SimpleMergeTest(indelVC1, indelVC2, indelVC3); + new SimpleMergeTest(indelVC1, indelVC3, indelVC3); + new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + + return SimpleMergeTest.getTests(SimpleMergeTest.class); + } + + private class SimpleMergeRSIDTest extends TestDataProvider { + List inputs; + String expected; + + private SimpleMergeRSIDTest(String... arg) { + super(SimpleMergeRSIDTest.class); + LinkedList allStrings = new LinkedList(Arrays.asList(arg)); + expected = allStrings.pollLast(); + inputs = allStrings; + } + + public String toString() { + return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected); + } + } + + @DataProvider(name = "simplemergersiddata") + public Object[][] createSimpleMergeRSIDData() { + new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest(".", "rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", ".", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); + new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); + new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); + new SimpleMergeRSIDTest("rs1", ".", ".", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3"); + + return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class); + } + + @Test(dataProvider = "simplemergersiddata") + public void testRSIDMerge(SimpleMergeRSIDTest cfg) { + List inputs = new ArrayList(); + for ( String id : cfg.inputs ) { + MutableVariantContext vc = new MutableVariantContext(snpVC1); + if ( ! id.equals(".") ) vc.setID(id); + inputs.add(vc); + + } + + VariantContext merged = myMerge(inputs); + Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); + } + + private VariantContext myMerge(List inputs) { + List priority = new ArrayList(); + for ( VariantContext vc : inputs ) priority.add(vc.getSource()); + + return VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + } + + // todo -- add tests for subset merging, especially with correct PLs + // todo -- test priority list + // todo -- test FilteredRecordMergeType + // todo -- no annotate origin + // todo -- test set key + // todo -- test filtered are uncalled +} diff --git a/public/packages/AnalyzeCovariates.xml b/public/packages/AnalyzeCovariates.xml index a6675a63d..e8d58862a 100644 --- a/public/packages/AnalyzeCovariates.xml +++ b/public/packages/AnalyzeCovariates.xml @@ -6,7 +6,7 @@ - +