From 68da555932761567f79f0b3bfdbc808dcf72ca5e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 15:16:37 -0400 Subject: [PATCH 01/18] UnitTest for simpleMerge for alleles --- .../VariantContextUtilsUnitTest.java | 176 ++++++++++++------ 1 file changed, 118 insertions(+), 58 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 81007f9ff..01d398c1a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -48,10 +48,8 @@ import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, delRef, ATC; - Genotype ref1, snp1, snp2, indel1, indelref; + Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; - VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; @BeforeSuite public void setup() { @@ -68,22 +66,9 @@ public class VariantContextUtilsUnitTest extends BaseTest { Aref = Allele.create("A", true); delRef = Allele.create("-", true); T = Allele.create("T"); + C = Allele.create("C"); ATC = Allele.create("ATC"); - - ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); - snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); - snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); - indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); - indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); - - refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); - snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); - snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); - snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); - snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); - indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); - indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); - indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + ATCATC = Allele.create("ATCATC"); } private VariantContext makeVC(String source, List alleles) { @@ -98,45 +83,124 @@ public class VariantContextUtilsUnitTest extends BaseTest { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; return new VariantContext(source, "1", start, stop, alleles, - VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), 1.0, filters, null, (byte)'C'); } - private class SimpleMergeTest extends TestDataProvider { - List inputVCs; - VariantContext expectedVC; + // -------------------------------------------------------------------------------- + // + // Test allele merging + // + // -------------------------------------------------------------------------------- - private SimpleMergeTest(VariantContext... vcsArg) { - super(SimpleMergeTest.class); - LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); - expectedVC = allVCs.pollLast(); - inputVCs = allVCs; + private class MergeAllelesTest extends TestDataProvider { + List> inputs; + List expected; + + private MergeAllelesTest(List... arg) { + super(MergeAllelesTest.class); + LinkedList> all = new LinkedList>(Arrays.asList(arg)); + expected = all.pollLast(); + inputs = all; } public String toString() { - return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected); } } - - @DataProvider(name = "simplemergedata") - public Object[][] createSimpleMergeData() { + @DataProvider(name = "mergeAlleles") + public Object[][] mergeAllelesData() { // first, do no harm - new SimpleMergeTest(refVC, refVC); - new SimpleMergeTest(snpVC1, snpVC1); - new SimpleMergeTest(indelVC1, indelVC1); - new SimpleMergeTest(indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(refVC, snpVC1, snpVC3); - new SimpleMergeTest(snpVC1, snpVC2, snpVC2); - new SimpleMergeTest(refVC, snpVC2, snpVC4); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(indelVC1, indelVC2, indelVC3); - new SimpleMergeTest(indelVC1, indelVC3, indelVC3); - new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); - return SimpleMergeTest.getTests(SimpleMergeTest.class); + new MergeAllelesTest(Arrays.asList(Aref, C), + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(Aref, C, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T, C), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef)); // todo -- FIXME me GdA + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + return MergeAllelesTest.getTests(MergeAllelesTest.class); } + @Test(dataProvider = "mergeAlleles") + public void testMergeAlleles(MergeAllelesTest cfg) { + final List priority = new ArrayList(); + final List inputs = new ArrayList(); + + int i = 0; + for ( final List alleles : cfg.inputs ) { + final String name = "vcf" + ++i; + priority.add(name); + inputs.add(makeVC(name, alleles)); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + Assert.assertEquals(merged.getAlleles(), cfg.expected); + } + +// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; +// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); +// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); +// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); +// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); +// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); +// +// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); +// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); +// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); +// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); +// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); +// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); +// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); +// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + + // -------------------------------------------------------------------------------- + // + // Test rsID merging + // + // -------------------------------------------------------------------------------- + private class SimpleMergeRSIDTest extends TestDataProvider { List inputs; String expected; @@ -156,10 +220,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "simplemergersiddata") public Object[][] createSimpleMergeRSIDData() { new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest(".", ".", "."); new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs1", "rs1"); new SimpleMergeRSIDTest(".", "rs1", "rs1"); new SimpleMergeRSIDTest("rs1", ".", "rs1"); new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); @@ -171,32 +238,25 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "simplemergersiddata") public void testRSIDMerge(SimpleMergeRSIDTest cfg) { - List inputs = new ArrayList(); - for ( String id : cfg.inputs ) { + final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T)); + final List inputs = new ArrayList(); + + for ( final String id : cfg.inputs ) { MutableVariantContext vc = new MutableVariantContext(snpVC1); if ( ! id.equals(".") ) vc.setID(id); inputs.add(vc); - } - VariantContext merged = myMerge(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, null, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } - private VariantContext myMerge(List inputs) { - List priority = new ArrayList(); - for ( VariantContext vc : inputs ) priority.add(vc.getSource()); - - return VariantContextUtils.simpleMerge(genomeLocParser, - inputs, priority, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); - } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType // todo -- no annotate origin // todo -- test set key - // todo -- test filtered are uncalled } From 46ca33dc047d6a52406142d9dc323185938d7dab Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:04:32 -0400 Subject: [PATCH 02/18] TestDataProvider now can be named --- .../test/org/broadinstitute/sting/BaseTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 63faf1ab9..35a81770d 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -132,15 +132,21 @@ public abstract class BaseTest { */ public static class TestDataProvider { private static final Map> tests = new HashMap>(); + private final String name; /** * Create a new TestDataProvider instance bound to the class variable C * @param c */ - public TestDataProvider(Class c) { + public TestDataProvider(Class c, String name) { if ( ! tests.containsKey(c) ) tests.put(c, new ArrayList()); tests.get(c).add(this); + this.name = name; + } + + public TestDataProvider(Class c) { + this(c, ""); } /** @@ -153,6 +159,11 @@ public abstract class BaseTest { for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); return params2.toArray(new Object[][]{}); } + + @Override + public String toString() { + return "TestDataProvider("+name+")"; + } } /** From 5cf82f923615b2dae0dee1b80f043fadacdfbc05 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:05:12 -0400 Subject: [PATCH 03/18] simpleMerge UnitTest tests filtered VC merging --- .../variantcontext/VariantContextUtils.java | 14 +- .../VariantContextUtilsUnitTest.java | 141 ++++++++++++++++-- 2 files changed, 135 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ca9c71eba..03fe969b5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -44,6 +44,11 @@ import java.io.Serializable; import java.util.*; public class VariantContextUtils { + public final static String MERGE_INTERSECTION = "Intersection"; + public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; + public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; + public final static String MERGE_FILTER_PREFIX = "filterIn"; + final public static JexlEngine engine = new JexlEngine(); static { engine.setSilent(false); // will throw errors now for selects that don't evaluate properly @@ -609,19 +614,20 @@ public class VariantContextUtils { if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) filters.clear(); + if ( annotateOrigin ) { // we care about where the call came from String setValue; if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered - setValue = "Intersection"; + setValue = MERGE_INTERSECTION; else if ( nFiltered == VCs.size() ) // everything was filtered out - setValue = "FilteredInAll"; + setValue = MERGE_FILTER_IN_ALL; else if ( variantSources.isEmpty() ) // everyone was reference - setValue = "ReferenceInAll"; + setValue = MERGE_REF_IN_ALL; else { LinkedHashSet s = new LinkedHashSet(); for ( VariantContext vc : VCs ) if ( vc.isVariant() ) - s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); + s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() ); setValue = Utils.join("-", s); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 01d398c1a..ff26d7245 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -41,6 +41,7 @@ import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; +import org.yaml.snakeyaml.Yaml; import java.io.File; import java.io.FileNotFoundException; @@ -75,6 +76,14 @@ public class VariantContextUtilsUnitTest extends BaseTest { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, String filter) { + return makeVC(source, alleles, filter.equals(".") ? null : new HashSet(Arrays.asList(filter))); + } + + private VariantContext makeVC(String source, List alleles, Set filters) { + return makeVC(source, alleles, null, filters); + } + private VariantContext makeVC(String source, List alleles, Collection genotypes) { return makeVC(source, alleles, genotypes, null); } @@ -179,22 +188,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getAlleles(), cfg.expected); } -// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; -// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); -// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); -// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); -// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); -// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); -// -// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); -// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); -// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); -// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); -// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); -// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); -// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); -// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); - // -------------------------------------------------------------------------------- // // Test rsID merging @@ -254,6 +247,122 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } + // -------------------------------------------------------------------------------- + // + // Test filtered merging + // + // -------------------------------------------------------------------------------- + + private class MergeFilteredTest extends TestDataProvider { + List inputs; + VariantContext expected; + String setExpected; + VariantContextUtils.FilteredRecordMergeType type; + + + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) { + this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected); + } + + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) { + super(MergeFilteredTest.class, name); + LinkedList all = new LinkedList(Arrays.asList(input1, input2)); + this.expected = expected; + this.type = type; + inputs = all; + this.setExpected = setExpected; + } + + public String toString() { + return String.format("%s input=%s expected=%s", super.toString(), inputs, expected); + } + } + + @DataProvider(name = "mergeFiltered") + public Object[][] mergeFilteredData() { + new MergeFilteredTest("AllPass", + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); + + new MergeFilteredTest("noFilters", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.MERGE_INTERSECTION); + + new MergeFilteredTest("oneFiltered", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("onePassOneFail", + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("FailOneUnfiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("AllFiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.MERGE_FILTER_IN_ALL); + + // test ALL vs. ANY + new MergeFilteredTest("OneFailAllUnfilteredArg", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + // test excluding allele in filtered record + new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + // promotion of site from unfiltered to PASSES + new MergeFilteredTest("UnfilteredPlusPassIsPass", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); + + return MergeFilteredTest.getTests(MergeFilteredTest.class); + } + + @Test(dataProvider = "mergeFiltered") + public void testMergeFiltered(MergeFilteredTest cfg) { + final List priority = new ArrayList(); + + for ( final VariantContext vc : cfg.inputs ) { + priority.add(vc.getSource()); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + + // test alleles are equal + Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); + + // test set field + Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected); + + // test filter field + Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); + } + + // todo -- add tests for subset merging, especially with correct PLs // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType From 30ab3af0c88d4d22a6d894c8d5e45863b4957445 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:14:59 -0400 Subject: [PATCH 04/18] A few more simpleMerge UnitTest tests for filtered vcs --- .../VariantContextUtilsUnitTest.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index ff26d7245..070fdaca4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -304,12 +304,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); - new MergeFilteredTest("FailOneUnfiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); - new MergeFilteredTest("AllFiltered", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"), @@ -317,6 +311,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { VariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY + new MergeFilteredTest("FailOneUnfiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + new MergeFilteredTest("OneFailAllUnfilteredArg", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "."), @@ -338,6 +339,18 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), VariantContextUtils.MERGE_INTERSECTION); + new MergeFilteredTest("RefInAll", + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_REF_IN_ALL); + + new MergeFilteredTest("RefInOne", + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + "2"); + return MergeFilteredTest.getTests(MergeFilteredTest.class); } From dab7232e9af623a3d085988bac751ee4df229748 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:26:11 -0400 Subject: [PATCH 05/18] simpleMerge UnitTest for not annotating and annotating to different info key --- .../VariantContextUtilsUnitTest.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 070fdaca4..77e4391a3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -375,10 +375,26 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } + @Test + public void testAnnotationSet() { + for ( final boolean annotate : Arrays.asList(true, false)) { + for ( final String set : Arrays.asList("set", "combine", "x")) { + final List priority = Arrays.asList("1", "2"); + VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); + VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); + + if ( annotate ) + Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION); + else + Assert.assertFalse(merged.hasAttribute(set)); + } + } + } // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X - // todo -- test FilteredRecordMergeType - // todo -- no annotate origin - // todo -- test set key + // todo -- test priority list } From a8e0fb26ea309a6620d540310f1224fb08ec7440 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 23 Sep 2011 07:33:20 -0400 Subject: [PATCH 06/18] Updating md5 because the file changed --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index b4a8498e1..1b2a6e82e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("6645babc8c7d46be0da223477c7b1291")); + Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 4397ce8653eab67546e261faef5ebd5619057b79 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:23:46 -0400 Subject: [PATCH 08/18] Moved removePLs to VariantContextUtils --- .../sting/utils/variantcontext/Genotype.java | 7 ------- .../sting/utils/variantcontext/VariantContextUtils.java | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 2e233c18e..fd8c70abe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -74,13 +74,6 @@ public class Genotype { return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); } - public static Genotype removePLs(Genotype g) { - Map attrs = new HashMap(g.getAttributes()); - attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); - return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); - } - public static Genotype modifyAlleles(Genotype g, List alleles) { return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 03fe969b5..93d4f6f5c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -159,6 +159,13 @@ public class VariantContextUtils { return "%." + precision + "f"; } + public static Genotype removePLs(Genotype g) { + Map attrs = new HashMap(g.getAttributes()); + attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); + attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); + } + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ @@ -740,7 +747,7 @@ public class VariantContextUtils { Map newGs = new HashMap(genotypes.size()); for ( Map.Entry g : genotypes.entrySet() ) { - newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); + newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue()); } return newGs; From a9f073fa68f500fbad4e068b75e3ff924a0d7d66 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:24:49 -0400 Subject: [PATCH 09/18] Genotype merging unit tests for simpleMerge -- Remaining TODOs are all for GdA --- .../VariantContextUtilsUnitTest.java | 319 +++++++++++++----- 1 file changed, 234 insertions(+), 85 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 77e4391a3..c1025a7de 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -21,14 +21,8 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -// our package package org.broadinstitute.sting.utils.variantcontext; - -// the imports for unit testing. - - import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; @@ -47,7 +41,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.*; - public class VariantContextUtilsUnitTest extends BaseTest { Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; @@ -72,10 +65,22 @@ public class VariantContextUtilsUnitTest extends BaseTest { ATCATC = Allele.create("ATCATC"); } + private Genotype makeG(String sample, Allele a1, Allele a2) { + return new Genotype(sample, Arrays.asList(a1, a2)); + } + + private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) { + return new Genotype(sample, Arrays.asList(a1, a2), log10pError); + } + private VariantContext makeVC(String source, List alleles) { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, Genotype... g1) { + return makeVC(source, alleles, Arrays.asList(g1)); + } + private VariantContext makeVC(String source, List alleles, String filter) { return makeVC(source, alleles, filter.equals(".") ? null : new HashSet(Arrays.asList(filter))); } @@ -121,66 +126,66 @@ public class VariantContextUtilsUnitTest extends BaseTest { public Object[][] mergeAllelesData() { // first, do no harm new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref)); + Arrays.asList(Aref)); new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref), - Arrays.asList(Aref)); + Arrays.asList(Aref), + Arrays.asList(Aref)); new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref, T), - Arrays.asList(Aref, T)); + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); new MergeAllelesTest(Arrays.asList(Aref, C), - Arrays.asList(Aref, T), - Arrays.asList(Aref, C, T)); + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); new MergeAllelesTest(Arrays.asList(Aref, T), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele new MergeAllelesTest(Arrays.asList(Aref, C, T), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); new MergeAllelesTest(Arrays.asList(Aref, T, C), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef)); // todo -- FIXME me GdA + Arrays.asList(delRef)); // todo -- FIXME me GdA new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATC)); + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); new MergeAllelesTest(Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); new MergeAllelesTest(Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } @Test(dataProvider = "mergeAlleles") public void testMergeAlleles(MergeAllelesTest cfg) { - final List priority = new ArrayList(); final List inputs = new ArrayList(); int i = 0; for ( final List alleles : cfg.inputs ) { final String name = "vcf" + ++i; - priority.add(name); inputs.add(makeVC(name, alleles)); } + final List priority = vcs2priority(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, @@ -281,87 +286,82 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "mergeFiltered") public Object[][] mergeFilteredData() { new MergeFilteredTest("AllPass", - makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("noFilters", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("oneFiltered", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("onePassOneFail", - makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("AllFiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.MERGE_FILTER_IN_ALL); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY new MergeFilteredTest("FailOneUnfiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("OneFailAllUnfilteredArg", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); // test excluding allele in filtered record new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); // promotion of site from unfiltered to PASSES new MergeFilteredTest("UnfilteredPlusPassIsPass", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("RefInAll", - makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_REF_IN_ALL); + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_REF_IN_ALL); new MergeFilteredTest("RefInOne", - makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - "2"); + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + "2"); return MergeFilteredTest.getTests(MergeFilteredTest.class); } @Test(dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { - final List priority = new ArrayList(); - - for ( final VariantContext vc : cfg.inputs ) { - priority.add(vc.getSource()); - } - + final List priority = vcs2priority(cfg.inputs); final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); @@ -375,6 +375,148 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } + // -------------------------------------------------------------------------------- + // + // Test genotype merging + // + // -------------------------------------------------------------------------------- + + private class MergeGenotypesTest extends TestDataProvider { + List inputs; + VariantContext expected; + List priority; + + private MergeGenotypesTest(String name, String priority, VariantContext... arg) { + super(MergeGenotypesTest.class, name); + LinkedList all = new LinkedList(Arrays.asList(arg)); + this.expected = all.pollLast(); + inputs = all; + this.priority = Arrays.asList(priority.split(",")); + } + + public String toString() { + return String.format("%s input=%s expected=%s", super.toString(), inputs, expected); + } + } + + @DataProvider(name = "mergeGenotypes") + public Object[][] mergeGenotypesData() { + new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1))); + + new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1))); + + new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2))); + + new MergeGenotypesTest("NonOverlappingGenotypes", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2))); + + new MergeGenotypesTest("PreserveNoCall", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2))); + + new MergeGenotypesTest("PerserveAlleles", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)), + makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2))); + + new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3))); + + new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3))); + + // todo -- GDA -- add tests for merging correct PLs + + return MergeGenotypesTest.getTests(MergeGenotypesTest.class); + } + + // todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE + + @Test(dataProvider = "mergeGenotypes") + public void testMergeGenotypes(MergeGenotypesTest cfg) { + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + + // test alleles are equal + Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); + + // test genotypes + assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes()); + } + + // necessary to not overload equals for genotypes + private void assertGenotypesAreMostlyEqual(Map actual, Map expected) { + if (actual == expected) { + return; + } + + if (actual == null || expected == null) { + Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual); + } + + if (actual.size() != expected.size()) { + Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); + } + + for (Map.Entry entry : actual.entrySet()) { + String key = entry.getKey(); + Genotype value = entry.getValue(); + Genotype expectedValue = expected.get(key); + + Assert.assertEquals(value.alleles, expectedValue.alleles); + Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError()); + Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods()); + if ( value.hasLikelihoods() ) + Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods()); + } + } + + @Test + public void testMergeGenotypesUniquify() { + final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)); + final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); + + // test genotypes + Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet(Arrays.asList("s1.1", "s1.2"))); + } + + @Test(expectedExceptions = UserException.class) + public void testMergeGenotypesRequireUnique() { + final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)); + final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false); + } + + // -------------------------------------------------------------------------------- + // + // Misc. tests + // + // -------------------------------------------------------------------------------- + @Test public void testAnnotationSet() { for ( final boolean annotate : Arrays.asList(true, false)) { @@ -395,6 +537,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { } } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + private static final List vcs2priority(final Collection vcs) { + final List priority = new ArrayList(); + + for ( final VariantContext vc : vcs ) { + priority.add(vc.getSource()); + } + + return priority; + } } From 106a26c42da8780cd33d7186ec0a658830ad6819 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:25:20 -0400 Subject: [PATCH 10/18] Minor file cleanup --- .../utils/variantcontext/VariantContextUtilsUnitTest.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index c1025a7de..15a5549b2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -24,18 +24,14 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.Test; import org.testng.annotations.DataProvider; -import org.yaml.snakeyaml.Yaml; +import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; @@ -446,8 +442,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { return MergeGenotypesTest.getTests(MergeGenotypesTest.class); } - // todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE - @Test(dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, From dfce301bebd62275ad55d326dd2801cbbce47d81 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:03:04 -0400 Subject: [PATCH 11/18] Looks for @Hidden annotation on all classes and excludes them from the docs --- .../sting/utils/help/GenericDocumentationHandler.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 4f1e95499..7ea77a939 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { public boolean includeInDocs(ClassDoc doc) { try { Class type = HelpUtils.getClassForDoc(doc); - return JVMUtils.isConcrete(type); + boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class); + return ! hidden && JVMUtils.isConcrete(type); } catch ( ClassNotFoundException e ) { return false; } From dd65ba5baee8df3b57819e684dd3b63843809836 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:03:37 -0400 Subject: [PATCH 12/18] @Hidden for DocumentationTest and GATKDocsExample --- .../org/broadinstitute/sting/gatk/examples/GATKDocsExample.java | 2 ++ .../broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java | 1 + 2 files changed, 3 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java index 4541a0537..db4f477c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; * @author Your Name * @since Date created */ +@Hidden public class GATKDocsExample extends RodWalker { /** * Put detailed documentation about the argument here. No need to duplicate the summary information diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java index 933e24784..5b60a9db5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java @@ -42,6 +42,7 @@ import java.util.*; * *

Body test

*/ +@Hidden public class DocumentationTest extends RodWalker { // the docs for the arguments are in the collection @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); From 2bb77a7978881db4ff1d0b57fa7e2afd9a271295 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:04:16 -0400 Subject: [PATCH 14/18] Docs for all VariantAnnotator annotations --- .../gatk/walkers/annotator/AlleleBalance.java | 3 + .../annotator/AlleleBalanceBySample.java | 3 + .../walkers/annotator/AnnotationByDepth.java | 5 +- .../gatk/walkers/annotator/BaseCounts.java | 3 + .../annotator/BaseQualityRankSumTest.java | 3 + .../walkers/annotator/ChromosomeCounts.java | 5 + .../walkers/annotator/DepthOfCoverage.java | 7 +- .../annotator/DepthPerAlleleBySample.java | 19 + .../gatk/walkers/annotator/FisherStrand.java | 5 + .../gatk/walkers/annotator/GCContent.java | 3 + .../walkers/annotator/HaplotypeScore.java | 4 + .../gatk/walkers/annotator/HardyWeinberg.java | 3 + .../walkers/annotator/HomopolymerRun.java | 4 +- .../{GLstats.java => InbreedingCoeff.java} | 15 +- .../gatk/walkers/annotator/IndelType.java | 6 +- .../sting/gatk/walkers/annotator/LowMQ.java | 3 + .../annotator/MappingQualityRankSumTest.java | 3 + .../walkers/annotator/MappingQualityZero.java | 3 + .../annotator/MappingQualityZeroBySample.java | 166 ++++--- .../annotator/MappingQualityZeroFraction.java | 5 +- .../gatk/walkers/annotator/NBaseCount.java | 5 +- .../gatk/walkers/annotator/QualByDepth.java | 7 +- .../walkers/annotator/RMSMappingQuality.java | 3 + .../gatk/walkers/annotator/RankSumTest.java | 4 +- .../ReadDepthAndAllelicFractionBySample.java | 416 +++++++++--------- .../walkers/annotator/ReadPosRankSumTest.java | 5 +- .../gatk/walkers/annotator/SBByDepth.java | 5 +- .../gatk/walkers/annotator/SampleList.java | 4 +- .../walkers/annotator/SpanningDeletions.java | 3 + .../annotator/TechnologyComposition.java | 8 +- 30 files changed, 398 insertions(+), 330 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/{GLstats.java => InbreedingCoeff.java} (90%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index cf68a9121..e501258c5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -43,6 +43,9 @@ import java.util.List; import java.util.Map; +/** + * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples + */ public class AlleleBalance extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index ddb7ab828..75c4037d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample + */ public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index dc41dbc81..353fd1c2c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.Map; - - +/** + * Abstract base class for all annotations that are normalized by depth + */ public abstract class AnnotationByDepth extends InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index ecfd9b707..46aa6d0f3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -47,6 +47,9 @@ import java.util.List; import java.util.Map; +/** + * Count of A, C, G, T bases across all samples + */ public class BaseCounts extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 2a5c996f7..6cab6d95f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -13,6 +13,9 @@ import java.util.LinkedHashMap; import java.util.List; +/** + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele) + */ public class BaseQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index ad06dcf52..5ed2a6761 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -44,6 +44,11 @@ import java.util.List; import java.util.Map; +/** + * Allele count in genotypes, for each ALT allele, in the same order as listed; + * allele Frequency, for each ALT allele, in the same order as listed; total number + * of alleles in called genotypes. + */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index a4d8db5bd..8826de232 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -16,7 +16,12 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Total (unfiltered) depth over all samples. + * + * Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D + * is N * D + */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 1652c8de7..1cd30c51d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -23,6 +23,25 @@ import java.util.List; import java.util.Map; +/** + * The depth of coverage of each VCF allele in this sample + * + * Complementary fields that two important ways of thinking about the depth of the data for this sample + * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal + * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. + * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the + * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the + * power I have to determine the genotype of the sample at this site, while the AD tells me how many times + * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering + * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like + * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would + * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that + * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that + * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and + * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base + * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what + * determine the genotype calls (see below). + */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 0cfca48fa..393eb549c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation + * being seen on only the forward or only the reverse strand) in the reads? More bias is + * indicative of false positive calls. + */ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index a46473f60..11a64b49f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site + */ public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 9af3b8e8e..df6da3b85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Consistency of the site with two (and only two) segregating haplotypes. Higher scores + * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls. + */ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 045505698..f068ed895 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -19,6 +19,9 @@ import java.util.List; import java.util.Map; +/** + * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium + */ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 463f7a645..197a00243 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -16,7 +16,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Largest contiguous homopolymer run of the variant allele in either direction on the reference. + */ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { private boolean ANNOTATE_INDELS = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 5295d6d21..a14007147 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -17,14 +17,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 5/16/11 - */ -// A set of annotations calculated directly from the GLs -public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { +/** + * Likelihood-based (using PL field) test for the inbreeding among samples. + * + * A continuous generalization of the Hardy-Weinberg test for disequilibrium that works + * well with limited coverage per sample. See the 1000 Genomes Phase I release for + * more information. + */ +public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index bfede40d2..e0abfcf3c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: delangel - * Date: Mar 11, 2011 - * Time: 11:47:33 AM - * To change this template use File | Settings | File Templates. + * Rough category of indel type (insertion, deletion, multi-allelic, other) */ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 09ffe0fb6..753740258 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads + */ public class LowMQ extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index cc62580a9..157c615d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -14,6 +14,9 @@ import java.util.LinkedHashMap; import java.util.List; +/** + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele) + */ public class MappingQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("MQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index f9caae227..3a3efc4e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -19,6 +19,9 @@ import java.util.List; import java.util.Map; +/** + * Total count across all samples of mapping quality zero reads + */ public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 3d234a1e3..f14d7a8a5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -1,85 +1,81 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Feb 4, 2011 - * Time: 6:46:25 PM - * To change this template use File | Settings | File Templates. - */ -public class MappingQualityZeroBySample extends GenotypeAnnotation { - public Map annotate(RefMetaDataTracker tracker, - AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { - if ( g == null || !g.isCalled() ) - return null; - - int mq0 = 0; - ReadBackedPileup pileup = null; - if (vc.isIndel() && context.hasExtendedEventPileup()) - pileup = context.getExtendedEventPileup(); - else if (context.hasBasePileup()) - pileup = context.getBasePileup(); - else return null; - - if (pileup != null) { - for (PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) - mq0++; - } - } - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%d", mq0)); - return map; - } - - public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } - - public List getDescriptions() { return Arrays.asList( - new VCFFormatHeaderLine(getKeyNames().get(0), 1, - VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); } - - -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Count for each sample of mapping quality zero reads + */ +public class MappingQualityZeroBySample extends GenotypeAnnotation { + public Map annotate(RefMetaDataTracker tracker, + AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { + if ( g == null || !g.isCalled() ) + return null; + + int mq0 = 0; + ReadBackedPileup pileup = null; + if (vc.isIndel() && context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + else return null; + + if (pileup != null) { + for (PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) + mq0++; + } + } + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%d", mq0)); + return map; + } + + public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } + + public List getDescriptions() { return Arrays.asList( + new VCFFormatHeaderLine(getKeyNames().get(0), 1, + VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); } + + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 3e8fe8998..2164537b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -17,8 +17,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - - +/** + * Fraction of all reads across samples that have mapping quality zero + */ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 74c562045..891e9ae56 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -17,11 +17,8 @@ import java.util.List; import java.util.Map; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 5/16/11 + * The number of N bases, counting only SOLiD data */ - public class NBaseCount extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 9a292c39a..552289309 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -15,7 +16,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth. + * + * Low scores are indicative of false positive calls and artifacts. + */ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 668129888..40f6d20d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -21,6 +21,9 @@ import java.util.List; import java.util.Map; +/** + * Root Mean Square of the mapping quality of the reads across all samples. + */ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 52c704055..93e093248 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -21,7 +21,9 @@ import java.util.List; import java.util.Map; - +/** + * Abstract root for all RankSum based annotations + */ public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index 26ca08380..772541eb6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -1,209 +1,207 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Feb 4, 2011 - * Time: 3:59:27 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { - - private static String REF_ALLELE = "REF"; - - private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, - AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { - if ( g == null || !g.isCalled() ) - return null; - - if ( vc.isSNP() ) - return annotateSNP(stratifiedContext, vc); - if ( vc.isIndel() ) - return annotateIndel(stratifiedContext, vc); - - return null; - } - - private Map annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { - - if ( ! stratifiedContext.hasBasePileup() ) return null; - - HashMap alleleCounts = new HashMap(); - for ( Allele allele : vc.getAlternateAlleles() ) - alleleCounts.put(allele.getBases()[0], 0); - - ReadBackedPileup pileup = stratifiedContext.getBasePileup(); - int totalDepth = pileup.size(); - - Map map = new HashMap(); - map.put(getKeyNames().get(0), totalDepth); // put total depth in right away - - if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! - - int mq0 = 0; // number of "ref" reads that are acually mq0 - for ( PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) { - mq0++; - continue; - } - if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt - alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); - } - - if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do - - // we need to add counts in the correct order - String[] fracs = new String[alleleCounts.size()]; - for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { - fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0)); - } - - map.put(getKeyNames().get(1), fracs); - return map; - } - - private Map annotateIndel(AlignmentContext - stratifiedContext, VariantContext - vc) { - - if ( ! stratifiedContext.hasExtendedEventPileup() ) { - return null; - } - - ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); - if ( pileup == null ) - return null; - int totalDepth = pileup.size(); - - Map map = new HashMap(); - map.put(getKeyNames().get(0), totalDepth); // put total depth in right away - - if ( totalDepth == 0 ) return map; - int mq0 = 0; // number of "ref" reads that are acually mq0 - - HashMap alleleCounts = new HashMap(); - Allele refAllele = vc.getReference(); - - for ( Allele allele : vc.getAlternateAlleles() ) { - - if ( allele.isNoCall() ) { - continue; // this does not look so good, should we die??? - } - - alleleCounts.put(getAlleleRepresentation(allele), 0); - } - - for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { - - if ( e.getMappingQual() == 0 ) { - mq0++; - continue; - } - - if ( e.isInsertion() ) { - - final String b = e.getEventBases(); - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } - - } else { - if ( e.isDeletion() ) { - if ( e.getEventLength() == refAllele.length() ) { - // this is indeed the deletion allele recorded in VC - final String b = DEL; - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } - } -// else { -// System.out.print(" deletion of WRONG length found"); -// } - } - } - } - - if ( mq0 == totalDepth ) return map; - - String[] fracs = new String[alleleCounts.size()]; - for (int i = 0; i < vc.getAlternateAlleles().size(); i++) - fracs[i] = String.format("%.3f", - ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); - - map.put(getKeyNames().get(1), fracs); - - //map.put(getKeyNames().get(0), counts); - return map; - } - - private String getAlleleRepresentation(Allele allele) { - if ( allele.isNull() ) { // deletion wrt the ref - return DEL; - } else { // insertion, pass actual bases - return allele.getBaseString(); - } - - } - - // public String getIndelBases() - public List getKeyNames() { return Arrays.asList("DP","FA"); } - - public List getDescriptions() { - return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), - 1, - VCFHeaderLineType.Integer, - "Total read depth per sample, including MQ0"), - new VCFFormatHeaderLine(getKeyNames().get(1), - VCFHeaderLineCount.UNBOUNDED, - VCFHeaderLineType.Float, - "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); - } -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Unsupported + */ +@Hidden +public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { + + private static String REF_ALLELE = "REF"; + + private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, + AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + if ( g == null || !g.isCalled() ) + return null; + + if ( vc.isSNP() ) + return annotateSNP(stratifiedContext, vc); + if ( vc.isIndel() ) + return annotateIndel(stratifiedContext, vc); + + return null; + } + + private Map annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { + + if ( ! stratifiedContext.hasBasePileup() ) return null; + + HashMap alleleCounts = new HashMap(); + for ( Allele allele : vc.getAlternateAlleles() ) + alleleCounts.put(allele.getBases()[0], 0); + + ReadBackedPileup pileup = stratifiedContext.getBasePileup(); + int totalDepth = pileup.size(); + + Map map = new HashMap(); + map.put(getKeyNames().get(0), totalDepth); // put total depth in right away + + if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! + + int mq0 = 0; // number of "ref" reads that are acually mq0 + for ( PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) { + mq0++; + continue; + } + if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt + alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); + } + + if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do + + // we need to add counts in the correct order + String[] fracs = new String[alleleCounts.size()]; + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { + fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0)); + } + + map.put(getKeyNames().get(1), fracs); + return map; + } + + private Map annotateIndel(AlignmentContext + stratifiedContext, VariantContext + vc) { + + if ( ! stratifiedContext.hasExtendedEventPileup() ) { + return null; + } + + ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); + if ( pileup == null ) + return null; + int totalDepth = pileup.size(); + + Map map = new HashMap(); + map.put(getKeyNames().get(0), totalDepth); // put total depth in right away + + if ( totalDepth == 0 ) return map; + int mq0 = 0; // number of "ref" reads that are acually mq0 + + HashMap alleleCounts = new HashMap(); + Allele refAllele = vc.getReference(); + + for ( Allele allele : vc.getAlternateAlleles() ) { + + if ( allele.isNoCall() ) { + continue; // this does not look so good, should we die??? + } + + alleleCounts.put(getAlleleRepresentation(allele), 0); + } + + for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { + + if ( e.getMappingQual() == 0 ) { + mq0++; + continue; + } + + if ( e.isInsertion() ) { + + final String b = e.getEventBases(); + if ( alleleCounts.containsKey(b) ) { + alleleCounts.put(b, alleleCounts.get(b)+1); + } + + } else { + if ( e.isDeletion() ) { + if ( e.getEventLength() == refAllele.length() ) { + // this is indeed the deletion allele recorded in VC + final String b = DEL; + if ( alleleCounts.containsKey(b) ) { + alleleCounts.put(b, alleleCounts.get(b)+1); + } + } +// else { +// System.out.print(" deletion of WRONG length found"); +// } + } + } + } + + if ( mq0 == totalDepth ) return map; + + String[] fracs = new String[alleleCounts.size()]; + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) + fracs[i] = String.format("%.3f", + ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); + + map.put(getKeyNames().get(1), fracs); + + //map.put(getKeyNames().get(0), counts); + return map; + } + + private String getAlleleRepresentation(Allele allele) { + if ( allele.isNull() ) { // deletion wrt the ref + return DEL; + } else { // insertion, pass actual bases + return allele.getBaseString(); + } + + } + + // public String getIndelBases() + public List getKeyNames() { return Arrays.asList("DP","FA"); } + + public List getDescriptions() { + return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), + 1, + VCFHeaderLineType.Integer, + "Total read depth per sample, including MQ0"), + new VCFFormatHeaderLine(getKeyNames().get(1), + VCFHeaderLineCount.UNBOUNDED, + VCFHeaderLineType.Float, + "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index aabfb2970..27a9306d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -19,11 +19,8 @@ import java.util.LinkedHashMap; import java.util.List; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 3/30/11 + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error). */ - public class ReadPosRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("ReadPosRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index 180bed24d..efe96f226 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -15,8 +15,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - - +/** + * SB annotation value by depth of alt containing samples + */ public class SBByDepth extends AnnotationByDepth { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index cd396036f..ff409484d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -41,7 +41,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * List all of the samples in the info field + */ public class SampleList extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 42203824f..f747fbc2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * Fraction of reads containing spanning deletions at this site. + */ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index fa48c57a3..1f5508f4c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -19,12 +20,9 @@ import java.util.List; import java.util.Map; /** - * Created by IntelliJ IDEA. - * User: delangel - * Date: 6/29/11 - * Time: 3:14 PM - * To change this template use File | Settings | File Templates. + * Counts of bases from SLX, 454, and SOLiD at this site */ +@Hidden public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { private String nSLX = "NumSLX"; private String n454 ="Num454"; From e3d4efb2834b6aa5049a36b3e7ad4ca9458140d2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 11:55:21 -0400 Subject: [PATCH 16/18] Remove N2 EXACT model code, which should never be used --- .../genotyper/ExactAFCalculationModel.java | 251 +----------------- .../genotyper/UnifiedArgumentCollection.java | 5 - 2 files changed, 9 insertions(+), 247 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 6ae437b27..8a3a97823 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // code for testing purposes // private final static boolean DEBUG = false; - private final static boolean PRINT_LIKELIHOODS = false; - private final static int N_CYCLES = 1; - private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded"); - private SimpleTimer timerGS = new SimpleTimer("linearExactGS"); - private final static boolean COMPARE_TO_GS = false; - - public enum ExactCalculation { - N2_GOLD_STANDARD, - LINEAR_EXPERIMENTAL - } - private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - - private boolean SIMPLE_GREEDY_GENOTYPER = false; - + private final boolean SIMPLE_GREEDY_GENOTYPER = false; private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - final private ExactCalculation calcToUse; protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); - calcToUse = UAC.EXACT_CALCULATION_TYPE; } public void getLog10PNonRef(RefMetaDataTracker tracker, @@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { Map GLs, Setalleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - double[] gsPosteriors; - if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here - gsPosteriors = log10AlleleFrequencyPosteriors.clone(); - - int idxAA = GenotypeType.AA.ordinal(); - int idxAB = GenotypeType.AB.ordinal(); - int idxBB = GenotypeType.BB.ordinal(); - - // todo -- remove me after testing - if ( N_CYCLES > 1 ) { - for ( int i = 0; i < N_CYCLES; i++) { - timerGS.restart(); - linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB); - timerGS.stop(); - - timerExpt.restart(); - linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone()); - timerExpt.stop(); - } - - System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n", - timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime()); - } - - int lastK = -1; - - int numAlleles = alleles.size(); + final int numAlleles = alleles.size(); + final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null; + final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null; int idxDiag = numAlleles; int incr = numAlleles - 1; - - double[][] posteriorCache = new double[numAlleles-1][]; - double[] bestAFguess = new double[numAlleles-1]; - for (int k=1; k < numAlleles; k++) { // multi-allelic approximation, part 1: Ideally // for each alt allele compute marginal (suboptimal) posteriors - @@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD - idxAA = 0; - idxAB = k; + final int idxAA = 0; + final int idxAB = k; // yy is always element on the diagonal. // 2 alleles: BBelement 2 // 3 alleles: BB element 3. CC element 5 // 4 alleles: - idxBB = idxDiag; + final int idxBB = idxDiag; idxDiag += incr--; - // todo - possible cleanup - switch ( calcToUse ) { - case N2_GOLD_STANDARD: - lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - break; - case LINEAR_EXPERIMENTAL: - lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - break; - } + final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); + if (numAlleles > 2) { posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); @@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); } - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - if ( COMPARE_TO_GS ) { - gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB); - - double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]); - double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]); - boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0; - - if ( ! eq || PRINT_LIKELIHOODS ) { - System.out.printf("----------------------------------------%n"); - for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) { - double x = log10AlleleFrequencyPosteriors[k]; - System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k, - x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k], - log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]); - } - System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n", - ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar); - } - } - } private static final ArrayList getGLs(Map GLs) { ArrayList genotypeLikelihoods = new ArrayList(); - //int j = 0; genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy for ( Genotype sample : GLs.values() ) { if ( sample.hasLikelihoods() ) { - //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); double[] gls = sample.getLikelihoods().getAsVector(); if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) @@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - // now with banding - public int linearExactBanded(Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { - throw new NotImplementedException(); -// final int numSamples = GLs.size(); -// final int numChr = 2*numSamples; -// final double[][] genotypeLikelihoods = getGLs(GLs); -// -// final ExactACCache logY = new ExactACCache(numSamples+1); -// logY.getkMinus0()[0] = 0.0; // the zero case -// -// double maxLog10L = Double.NEGATIVE_INFINITY; -// boolean done = false; -// int lastK = -1; -// final int BAND_SIZE = 10; -// -// for (int k=0; k <= numChr && ! done; k++ ) { -// final double[] kMinus0 = logY.getkMinus0(); -// int jStart = Math.max(k - BAND_SIZE, 1); -// int jStop = Math.min(k + BAND_SIZE, numSamples); -// -// if ( k == 0 ) { // special case for k = 0 -// for ( int j=1; j <= numSamples; j++ ) { -// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()]; -// } -// } else { // k > 0 -// final double[] kMinus1 = logY.getkMinus1(); -// final double[] kMinus2 = logY.getkMinus2(); -// Arrays.fill(kMinus0,0); -// -// for ( int j = jStart; j <= jStop; j++ ) { -// final double[] gl = genotypeLikelihoods[j]; -// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1]; -// -// double aa = Double.NEGATIVE_INFINITY; -// double ab = Double.NEGATIVE_INFINITY; -// if (k < 2*j-1) -// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()]; -// -// if (k < 2*j) -// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()]; -// -// double log10Max; -// if (k > 1) { -// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()]; -// log10Max = approximateLog10SumLog10(aa, ab, bb); -// } else { -// // we know we aren't considering the BB case, so we can use an optimized log10 function -// log10Max = approximateLog10SumLog10(aa, ab); -// } -// -// // finally, update the L(j,k) value -// kMinus0[j] = log10Max - logDenominator; -// -// String offset = Utils.dupString(' ',k); -// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]); -// } -// } -// -// // update the posteriors vector -// final double log10LofK = kMinus0[jStop]; -// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; -// -// // can we abort early? -// lastK = k; -// maxLog10L = Math.max(maxLog10L, log10LofK); -// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { -// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); -// done = true; -// } -// -// logY.rotate(); -// } -// -// return lastK; - } - public int linearExact(Map GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { @@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return calls; } - // ------------------------------------------------------------------------------------- - // - // Gold standard, but O(N^2), implementation. - // - // TODO -- remove me for clarity in this code - // - // ------------------------------------------------------------------------------------- - public int gdaN2GoldStandard(Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { - int numSamples = GLs.size(); - int numChr = 2*numSamples; - - double[][] logYMatrix = new double[1+numSamples][1+numChr]; - - for (int i=0; i <=numSamples; i++) - for (int j=0; j <=numChr; j++) - logYMatrix[i][j] = Double.NEGATIVE_INFINITY; - - //YMatrix[0][0] = 1.0; - logYMatrix[0][0] = 0.0; - int j=0; - - for ( Map.Entry sample : GLs.entrySet() ) { - j++; - - if ( !sample.getValue().hasLikelihoods() ) - continue; - - //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); - double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector(); - //double logDenominator = Math.log10(2.0*j*(2.0*j-1)); - double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; - - // special treatment for k=0: iteration reduces to: - //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()]; - logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA]; - - for (int k=1; k <= 2*j; k++ ) { - - //double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()]; - double logNumerator[]; - logNumerator = new double[3]; - if (k < 2*j-1) - logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] + - genotypeLikelihoods[idxAA]; - else - logNumerator[0] = Double.NEGATIVE_INFINITY; - - - if (k < 2*j) - logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] + - genotypeLikelihoods[idxAB]; - else - logNumerator[1] = Double.NEGATIVE_INFINITY; - - if (k > 1) - logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] + - genotypeLikelihoods[idxBB]; - else - logNumerator[2] = Double.NEGATIVE_INFINITY; - - double logNum = MathUtils.softMax(logNumerator); - - //YMatrix[j][k] = num/den; - logYMatrix[j][k] = logNum - logDenominator; - } - - } - - for (int k=0; k <= numChr; k++) - log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k]; - - return numChr; - } - private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { int j = logYMatrix.length - 1; System.out.printf("-----------------------------------%n"); @@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); } } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 7b8045581..3c8fd4451 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -168,10 +168,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false) public boolean GSA_PRODUCTION_ONLY = false; - @Hidden - @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false) - public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL; - @Hidden @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; @@ -191,7 +187,6 @@ public class UnifiedArgumentCollection { uac.GLmodel = GLmodel; uac.AFmodel = AFmodel; - uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE; uac.heterozygosity = heterozygosity; uac.PCR_error = PCR_error; uac.GenotypingMode = GenotypingMode; From cc23b0b8a9b184eee6908a2fcbfdf63318be11b1 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Fri, 23 Sep 2011 14:52:31 -0400 Subject: [PATCH 17/18] Fix for recent change modelling unmapped shards: don't invoke optimization to combine mapped and unmapped shards. --- .../sting/gatk/datasources/reads/LowMemoryIntervalSharder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java index ba6321121..bf5f33dc3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java @@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator { */ public FilePointer next() { FilePointer current = wrappedIterator.next(); - while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0) + while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0) current = current.combine(parser,wrappedIterator.next()); return current; } From e1cb5f6459a9e1c534acb583dd94f9412f35322e Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 23 Sep 2011 15:00:03 -0400 Subject: [PATCH 18/18] SnpEff annotator now assigns a functional class to each effect and distinguishes between actual effects and mere modifiers. -We now assign a functional class (nonsense, missense, silent, or none) to each SnpEff effect, and add a SNPEFF_FUNCTIONAL_CLASS annotation to the INFO field of the output VCF. -Effects are now prioritized according to both biological impact and functional class, instead of impact only. -Many of SnpEff's "low-impact" effects are now classified as "modifiers" with lower priority than every other effect. This includes such "effects" as DOWNSTREAM, UPSTREAM, INTRON, GENE, EXON, and others that really describe the location of the variant rather than its biological effect. This code will be short-lived (likely 1.2-only), as the next version of SnpEff will include most of these features directly. Checking this change into Stable+Unstable instead of Unstable because the current functional class stratification in VariantEval is basically broken and urgently needs to be fixed for production purposes. --- .../sting/gatk/walkers/annotator/SnpEff.java | 160 ++++++++++++------ .../stratifications/FunctionalClass.java | 19 ++- .../VariantAnnotatorIntegrationTest.java | 4 +- .../VariantEvalIntegrationTest.java | 2 +- 4 files changed, 126 insertions(+), 59 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 8f99c6118..973b3277d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -82,7 +82,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3), GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4), TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6), - EXON_ID_KEY ("SNPEFF_EXON_ID", 7); + EXON_ID_KEY ("SNPEFF_EXON_ID", 7), + FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1); // Actual text of the key private final String keyName; @@ -108,48 +109,73 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio // Possible SnpEff biological effects. All effect names found in the SnpEff input file // are validated against this list. public enum EffectType { - NONE, - CHROMOSOME, - INTERGENIC, - UPSTREAM, - UTR_5_PRIME, - UTR_5_DELETED, - START_GAINED, - SPLICE_SITE_ACCEPTOR, - SPLICE_SITE_DONOR, - START_LOST, - SYNONYMOUS_START, - NON_SYNONYMOUS_START, - CDS, - GENE, - TRANSCRIPT, - EXON, - EXON_DELETED, - NON_SYNONYMOUS_CODING, - SYNONYMOUS_CODING, - FRAME_SHIFT, - CODON_CHANGE, - CODON_INSERTION, - CODON_CHANGE_PLUS_CODON_INSERTION, - CODON_DELETION, - CODON_CHANGE_PLUS_CODON_DELETION, - STOP_GAINED, - SYNONYMOUS_STOP, - NON_SYNONYMOUS_STOP, - STOP_LOST, - INTRON, - UTR_3_PRIME, - UTR_3_DELETED, - DOWNSTREAM, - INTRON_CONSERVED, - INTERGENIC_CONSERVED, - REGULATION, - CUSTOM, - WITHIN_NON_CODING_GENE + // High-impact effects: + FRAME_SHIFT (EffectFunctionalClass.NONE, false), + STOP_GAINED (EffectFunctionalClass.NONSENSE, false), + START_LOST (EffectFunctionalClass.NONE, false), + SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false), + SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false), + EXON_DELETED (EffectFunctionalClass.NONE, false), + STOP_LOST (EffectFunctionalClass.NONE, false), + + // Moderate-impact effects: + NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false), + CODON_CHANGE (EffectFunctionalClass.NONE, false), + CODON_INSERTION (EffectFunctionalClass.NONE, false), + CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false), + CODON_DELETION (EffectFunctionalClass.NONE, false), + CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false), + UTR_5_DELETED (EffectFunctionalClass.NONE, false), + UTR_3_DELETED (EffectFunctionalClass.NONE, false), + + // Low-impact effects: + SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false), + SYNONYMOUS_START (EffectFunctionalClass.SILENT, false), + NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false), + SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false), + NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false), + START_GAINED (EffectFunctionalClass.NONE, false), + + // Modifiers: + NONE (EffectFunctionalClass.NONE, true), + CHROMOSOME (EffectFunctionalClass.NONE, true), + INTERGENIC (EffectFunctionalClass.NONE, true), + UPSTREAM (EffectFunctionalClass.NONE, true), + UTR_5_PRIME (EffectFunctionalClass.NONE, true), + CDS (EffectFunctionalClass.NONE, true), + GENE (EffectFunctionalClass.NONE, true), + TRANSCRIPT (EffectFunctionalClass.NONE, true), + EXON (EffectFunctionalClass.NONE, true), + INTRON (EffectFunctionalClass.NONE, true), + UTR_3_PRIME (EffectFunctionalClass.NONE, true), + DOWNSTREAM (EffectFunctionalClass.NONE, true), + INTRON_CONSERVED (EffectFunctionalClass.NONE, true), + INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true), + REGULATION (EffectFunctionalClass.NONE, true), + CUSTOM (EffectFunctionalClass.NONE, true), + WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true); + + private final EffectFunctionalClass functionalClass; + private final boolean isModifier; + + EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) { + this.functionalClass = functionalClass; + this.isModifier = isModifier; + } + + public EffectFunctionalClass getFunctionalClass() { + return functionalClass; + } + + public boolean isModifier() { + return isModifier; + } } - // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. + // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of + // classifying some of the LOW impact effects as MODIFIERs. public enum EffectImpact { + MODIFIER (0), LOW (1), MODERATE (2), HIGH (3); @@ -163,6 +189,10 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio public boolean isHigherImpactThan ( EffectImpact other ) { return this.severityRating > other.severityRating; } + + public boolean isSameImpactAs ( EffectImpact other ) { + return this.severityRating == other.severityRating; + } } // SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information. @@ -172,6 +202,24 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio UNKNOWN } + // We assign a functional class to each SnpEff effect. + public enum EffectFunctionalClass { + NONE (0), + SILENT (1), + MISSENSE (2), + NONSENSE (3); + + private final int priority; + + EffectFunctionalClass ( int priority ) { + this.priority = priority; + } + + public boolean isHigherPriorityThan ( EffectFunctionalClass other ) { + return this.priority > other.priority; + } + } + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // without providing a SnpEff rod via --snpEffFile): @@ -336,7 +384,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio InfoFieldKey.GENE_NAME_KEY.getKeyName(), InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), - InfoFieldKey.EXON_ID_KEY.getKeyName() + InfoFieldKey.EXON_ID_KEY.getKeyName(), + InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName() ); } @@ -349,7 +398,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"), - new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant") + new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"), + new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())) ); } @@ -411,11 +461,16 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio return; } - try { - impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]); + if ( effect != null && effect.isModifier() ) { + impact = EffectImpact.MODIFIER; } - catch ( IllegalArgumentException e ) { - parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()])); + else { + try { + impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]); + } + catch ( IllegalArgumentException e ) { + parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()])); + } } codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()]; @@ -472,9 +527,17 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } // Otherwise, both effects are either in or not in a coding gene, so we compare the impacts - // of the effects themselves: + // of the effects themselves. Effects with the same impact are tie-broken using the + // functional class of the effect: - return impact.isHigherImpactThan(other.impact); + if ( impact.isHigherImpactThan(other.impact) ) { + return true; + } + else if ( impact.isSameImpactAs(other.impact) ) { + return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass()); + } + + return false; } public Map getAnnotations() { @@ -488,6 +551,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype); addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID); addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID); + addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString()); return annotations; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 88ffcaaeb..1dc047b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -62,14 +62,17 @@ public class FunctionalClass extends VariantStratifier { annotationId++; } while (eval.hasAttribute(key)); - } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) { - SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString()); - if ( snpEffType == SnpEff.EffectType.STOP_GAINED ) - type = FunctionalType.nonsense; - else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING ) - type = FunctionalType.missense; - else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING ) - type = FunctionalType.silent; + } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) { + try { + SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString()); + if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE ) + type = FunctionalType.nonsense; + else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE ) + type = FunctionalType.missense; + else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT ) + type = FunctionalType.silent; + } + catch ( Exception e ) {} // don't error out if the type isn't supported } if ( type != null ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 2c06c6b7f..04bff8d41 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -132,9 +132,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + - "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429", 1, - Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e") + Arrays.asList("122321a85e448f21679f6ca15c5e22ad") ); executeTest("Testing SnpEff annotations", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index b90e6d0ff..9fe253ecb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12") + Arrays.asList("d9dcb352c53106f54fcc981f15d35a90") ); executeTest("testFunctionClassWithSnpeff", spec); }