From 4e9020c9f7bd6fc429090213ad06c9a6b46ecc70 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 13:28:25 -0400 Subject: [PATCH 01/20] Fixed alignment start for hard clipping insertions --- .../sting/utils/clipreads/ClippingOp.java | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 951ab265c..0c2def1c6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -432,25 +432,37 @@ public class ClippingOp { } private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { - int shift = 0; + int newShift = 0; + int oldShift = 0; + int deletionShift = 0; - // Rewind to previous start (by counting everything that was already clipped in this read) - for (CigarElement cigarElement : oldCigar.getCigarElements()) { - if (!cigarElement.getOperator().consumesReferenceBases()) - shift -= cigarElement.getLength(); - else - break; - } - - // Advance to new start (by counting everything new that has been clipped ) for (CigarElement cigarElement : newCigar.getCigarElements()) { - if (!cigarElement.getOperator().consumesReferenceBases()) - shift += cigarElement.getLength(); + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) + newShift += cigarElement.getLength(); else break; } - return shift; + for (CigarElement cigarElement : oldCigar.getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP ) + oldShift += Math.min(cigarElement.getLength(), newShift - oldShift); + else + break; + } + + int basesClipped = 0; + for (CigarElement cigarElement : oldCigar.getCigarElements()) { + if (basesClipped > newShift) // are we beyond the clipped region? + break; + + else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift + deletionShift += cigarElement.getLength(); + + else if (cigarElement.getOperator() != CigarOperator.INSERTION) // if it's not an insertion or deletion, than it counts as hard clipped base. + basesClipped += cigarElement.getLength(); + } + + return newShift - oldShift + deletionShift; } private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { From 1acf7945c544a923c5024b06a8322d351c889584 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 14:51:14 -0400 Subject: [PATCH 02/20] Fixed hard clipped cigar and alignment start * Hard clipped Cigar now includes all insertions that were hard clipped and not the deletions. * The alignment start is now recalculated according to the new hard clipped cigar representation --- .../sting/utils/clipreads/ClippingOp.java | 6 +++--- .../sting/utils/clipreads/ReadClipper.java | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 0c2def1c6..47ce8165c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -458,7 +458,7 @@ public class ClippingOp { else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift deletionShift += cigarElement.getLength(); - else if (cigarElement.getOperator() != CigarOperator.INSERTION) // if it's not an insertion or deletion, than it counts as hard clipped base. + else basesClipped += cigarElement.getLength(); } @@ -474,8 +474,8 @@ public class ClippingOp { return -clippedLength; } - if (cigarElement.getOperator() == CigarOperator.DELETION) - return cigarElement.getLength(); +// if (cigarElement.getOperator() == CigarOperator.DELETION) +// return cigarElement.getLength(); return 0; } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 9b9ce4fdf..e2ecbe46b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -71,21 +71,21 @@ public class ReadClipper { private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); - int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); - if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read)) + if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - // TODO add check in the Hardclip function - if ( start > stop ) - stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); - + if ( start > stop ) { +// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + } //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read - stop -= numDeletions(read); - if ( start > stop ) - start -= numDeletions(read); +// stop -= numDeletions(read); +// if ( start > stop ) +// start -= numDeletions(read); //System.out.println("Clipping start/stop: " + start + "/" + stop); From 68da555932761567f79f0b3bfdbc808dcf72ca5e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 15:16:37 -0400 Subject: [PATCH 03/20] UnitTest for simpleMerge for alleles --- .../VariantContextUtilsUnitTest.java | 176 ++++++++++++------ 1 file changed, 118 insertions(+), 58 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 81007f9ff..01d398c1a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -48,10 +48,8 @@ import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, delRef, ATC; - Genotype ref1, snp1, snp2, indel1, indelref; + Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; - VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; @BeforeSuite public void setup() { @@ -68,22 +66,9 @@ public class VariantContextUtilsUnitTest extends BaseTest { Aref = Allele.create("A", true); delRef = Allele.create("-", true); T = Allele.create("T"); + C = Allele.create("C"); ATC = Allele.create("ATC"); - - ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); - snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); - snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); - indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); - indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); - - refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); - snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); - snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); - snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); - snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); - indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); - indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); - indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + ATCATC = Allele.create("ATCATC"); } private VariantContext makeVC(String source, List alleles) { @@ -98,45 +83,124 @@ public class VariantContextUtilsUnitTest extends BaseTest { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; return new VariantContext(source, "1", start, stop, alleles, - VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), + genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes), 1.0, filters, null, (byte)'C'); } - private class SimpleMergeTest extends TestDataProvider { - List inputVCs; - VariantContext expectedVC; + // -------------------------------------------------------------------------------- + // + // Test allele merging + // + // -------------------------------------------------------------------------------- - private SimpleMergeTest(VariantContext... vcsArg) { - super(SimpleMergeTest.class); - LinkedList allVCs = new LinkedList(Arrays.asList(vcsArg)); - expectedVC = allVCs.pollLast(); - inputVCs = allVCs; + private class MergeAllelesTest extends TestDataProvider { + List> inputs; + List expected; + + private MergeAllelesTest(List... arg) { + super(MergeAllelesTest.class); + LinkedList> all = new LinkedList>(Arrays.asList(arg)); + expected = all.pollLast(); + inputs = all; } public String toString() { - return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); + return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected); } } - - @DataProvider(name = "simplemergedata") - public Object[][] createSimpleMergeData() { + @DataProvider(name = "mergeAlleles") + public Object[][] mergeAllelesData() { // first, do no harm - new SimpleMergeTest(refVC, refVC); - new SimpleMergeTest(snpVC1, snpVC1); - new SimpleMergeTest(indelVC1, indelVC1); - new SimpleMergeTest(indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(refVC, snpVC1, snpVC3); - new SimpleMergeTest(snpVC1, snpVC2, snpVC2); - new SimpleMergeTest(refVC, snpVC2, snpVC4); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref), + Arrays.asList(Aref)); - new SimpleMergeTest(indelVC1, indelVC2, indelVC3); - new SimpleMergeTest(indelVC1, indelVC3, indelVC3); - new SimpleMergeTest(indelVC2, indelVC3, indelVC3); + new MergeAllelesTest(Arrays.asList(Aref), + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); - return SimpleMergeTest.getTests(SimpleMergeTest.class); + new MergeAllelesTest(Arrays.asList(Aref, C), + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(Aref, C, T), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); + + new MergeAllelesTest(Arrays.asList(Aref, T, C), + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef)); // todo -- FIXME me GdA + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); + + new MergeAllelesTest(Arrays.asList(delRef), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + new MergeAllelesTest(Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); + + return MergeAllelesTest.getTests(MergeAllelesTest.class); } + @Test(dataProvider = "mergeAlleles") + public void testMergeAlleles(MergeAllelesTest cfg) { + final List priority = new ArrayList(); + final List inputs = new ArrayList(); + + int i = 0; + for ( final List alleles : cfg.inputs ) { + final String name = "vcf" + ++i; + priority.add(name); + inputs.add(makeVC(name, alleles)); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, priority, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + Assert.assertEquals(merged.getAlleles(), cfg.expected); + } + +// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; +// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); +// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); +// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); +// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); +// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); +// +// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); +// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); +// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); +// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); +// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); +// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); +// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); +// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); + + // -------------------------------------------------------------------------------- + // + // Test rsID merging + // + // -------------------------------------------------------------------------------- + private class SimpleMergeRSIDTest extends TestDataProvider { List inputs; String expected; @@ -156,10 +220,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "simplemergersiddata") public Object[][] createSimpleMergeRSIDData() { new SimpleMergeRSIDTest(".", "."); + new SimpleMergeRSIDTest(".", ".", "."); new SimpleMergeRSIDTest("rs1", "rs1"); + new SimpleMergeRSIDTest("rs1", "rs1", "rs1"); new SimpleMergeRSIDTest(".", "rs1", "rs1"); new SimpleMergeRSIDTest("rs1", ".", "rs1"); new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); + new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); @@ -171,32 +238,25 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "simplemergersiddata") public void testRSIDMerge(SimpleMergeRSIDTest cfg) { - List inputs = new ArrayList(); - for ( String id : cfg.inputs ) { + final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T)); + final List inputs = new ArrayList(); + + for ( final String id : cfg.inputs ) { MutableVariantContext vc = new MutableVariantContext(snpVC1); if ( ! id.equals(".") ) vc.setID(id); inputs.add(vc); - } - VariantContext merged = myMerge(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + inputs, null, + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } - private VariantContext myMerge(List inputs) { - List priority = new ArrayList(); - for ( VariantContext vc : inputs ) priority.add(vc.getSource()); - - return VariantContextUtils.simpleMerge(genomeLocParser, - inputs, priority, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); - } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType // todo -- no annotate origin // todo -- test set key - // todo -- test filtered are uncalled } From 39b54211d079dc2c70a2a9b4a38b9e08880b24df Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 22 Sep 2011 15:46:55 -0400 Subject: [PATCH 04/20] Fixed hard clipping soft clipped bases after hard clips if soft clipped bases were after a hard clipped section of the read, the hard clip was clipping the left soft clip tail as if it were a right tail. Mayhem. --- .../org/broadinstitute/sting/utils/clipreads/ReadClipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index e2ecbe46b..d0e7f8371 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -145,7 +145,7 @@ public class ReadClipper { cutLeft = readIndex + cigarElement.getLength() - 1; } } - else + else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) rightTail = true; if (cigarElement.getOperator().consumesReadBases()) From 46ca33dc047d6a52406142d9dc323185938d7dab Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:04:32 -0400 Subject: [PATCH 06/20] TestDataProvider now can be named --- .../test/org/broadinstitute/sting/BaseTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 63faf1ab9..35a81770d 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -132,15 +132,21 @@ public abstract class BaseTest { */ public static class TestDataProvider { private static final Map> tests = new HashMap>(); + private final String name; /** * Create a new TestDataProvider instance bound to the class variable C * @param c */ - public TestDataProvider(Class c) { + public TestDataProvider(Class c, String name) { if ( ! tests.containsKey(c) ) tests.put(c, new ArrayList()); tests.get(c).add(this); + this.name = name; + } + + public TestDataProvider(Class c) { + this(c, ""); } /** @@ -153,6 +159,11 @@ public abstract class BaseTest { for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); return params2.toArray(new Object[][]{}); } + + @Override + public String toString() { + return "TestDataProvider("+name+")"; + } } /** From 5cf82f923615b2dae0dee1b80f043fadacdfbc05 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:05:12 -0400 Subject: [PATCH 07/20] simpleMerge UnitTest tests filtered VC merging --- .../variantcontext/VariantContextUtils.java | 14 +- .../VariantContextUtilsUnitTest.java | 141 ++++++++++++++++-- 2 files changed, 135 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ca9c71eba..03fe969b5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -44,6 +44,11 @@ import java.io.Serializable; import java.util.*; public class VariantContextUtils { + public final static String MERGE_INTERSECTION = "Intersection"; + public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; + public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; + public final static String MERGE_FILTER_PREFIX = "filterIn"; + final public static JexlEngine engine = new JexlEngine(); static { engine.setSilent(false); // will throw errors now for selects that don't evaluate properly @@ -609,19 +614,20 @@ public class VariantContextUtils { if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) filters.clear(); + if ( annotateOrigin ) { // we care about where the call came from String setValue; if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered - setValue = "Intersection"; + setValue = MERGE_INTERSECTION; else if ( nFiltered == VCs.size() ) // everything was filtered out - setValue = "FilteredInAll"; + setValue = MERGE_FILTER_IN_ALL; else if ( variantSources.isEmpty() ) // everyone was reference - setValue = "ReferenceInAll"; + setValue = MERGE_REF_IN_ALL; else { LinkedHashSet s = new LinkedHashSet(); for ( VariantContext vc : VCs ) if ( vc.isVariant() ) - s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); + s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() ); setValue = Utils.join("-", s); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 01d398c1a..ff26d7245 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -41,6 +41,7 @@ import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; +import org.yaml.snakeyaml.Yaml; import java.io.File; import java.io.FileNotFoundException; @@ -75,6 +76,14 @@ public class VariantContextUtilsUnitTest extends BaseTest { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, String filter) { + return makeVC(source, alleles, filter.equals(".") ? null : new HashSet(Arrays.asList(filter))); + } + + private VariantContext makeVC(String source, List alleles, Set filters) { + return makeVC(source, alleles, null, filters); + } + private VariantContext makeVC(String source, List alleles, Collection genotypes) { return makeVC(source, alleles, genotypes, null); } @@ -179,22 +188,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getAlleles(), cfg.expected); } -// VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3; -// ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); -// snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); -// snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); -// indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30}); -// indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30}); -// -// refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); -// snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); -// snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2)); -// snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1)); -// snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2)); -// indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref)); -// indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1)); -// indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1)); - // -------------------------------------------------------------------------------- // // Test rsID merging @@ -254,6 +247,122 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); } + // -------------------------------------------------------------------------------- + // + // Test filtered merging + // + // -------------------------------------------------------------------------------- + + private class MergeFilteredTest extends TestDataProvider { + List inputs; + VariantContext expected; + String setExpected; + VariantContextUtils.FilteredRecordMergeType type; + + + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) { + this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected); + } + + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) { + super(MergeFilteredTest.class, name); + LinkedList all = new LinkedList(Arrays.asList(input1, input2)); + this.expected = expected; + this.type = type; + inputs = all; + this.setExpected = setExpected; + } + + public String toString() { + return String.format("%s input=%s expected=%s", super.toString(), inputs, expected); + } + } + + @DataProvider(name = "mergeFiltered") + public Object[][] mergeFilteredData() { + new MergeFilteredTest("AllPass", + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); + + new MergeFilteredTest("noFilters", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.MERGE_INTERSECTION); + + new MergeFilteredTest("oneFiltered", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("onePassOneFail", + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("FailOneUnfiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + new MergeFilteredTest("AllFiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.MERGE_FILTER_IN_ALL); + + // test ALL vs. ANY + new MergeFilteredTest("OneFailAllUnfilteredArg", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + // test excluding allele in filtered record + new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + + // promotion of site from unfiltered to PASSES + new MergeFilteredTest("UnfilteredPlusPassIsPass", + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); + + return MergeFilteredTest.getTests(MergeFilteredTest.class); + } + + @Test(dataProvider = "mergeFiltered") + public void testMergeFiltered(MergeFilteredTest cfg) { + final List priority = new ArrayList(); + + for ( final VariantContext vc : cfg.inputs ) { + priority.add(vc.getSource()); + } + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + + // test alleles are equal + Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); + + // test set field + Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected); + + // test filter field + Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); + } + + // todo -- add tests for subset merging, especially with correct PLs // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X // todo -- test FilteredRecordMergeType From 30ab3af0c88d4d22a6d894c8d5e45863b4957445 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:14:59 -0400 Subject: [PATCH 08/20] A few more simpleMerge UnitTest tests for filtered vcs --- .../VariantContextUtilsUnitTest.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index ff26d7245..070fdaca4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -304,12 +304,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); - new MergeFilteredTest("FailOneUnfiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); - new MergeFilteredTest("AllFiltered", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"), @@ -317,6 +311,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { VariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY + new MergeFilteredTest("FailOneUnfiltered", + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + new MergeFilteredTest("OneFailAllUnfilteredArg", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "."), @@ -338,6 +339,18 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), VariantContextUtils.MERGE_INTERSECTION); + new MergeFilteredTest("RefInAll", + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_REF_IN_ALL); + + new MergeFilteredTest("RefInOne", + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + "2"); + return MergeFilteredTest.getTests(MergeFilteredTest.class); } From dab7232e9af623a3d085988bac751ee4df229748 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 22 Sep 2011 17:26:11 -0400 Subject: [PATCH 09/20] simpleMerge UnitTest for not annotating and annotating to different info key --- .../VariantContextUtilsUnitTest.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 070fdaca4..77e4391a3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -375,10 +375,26 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } + @Test + public void testAnnotationSet() { + for ( final boolean annotate : Arrays.asList(true, false)) { + for ( final String set : Arrays.asList("set", "combine", "x")) { + final List priority = Arrays.asList("1", "2"); + VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); + VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); + + if ( annotate ) + Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION); + else + Assert.assertFalse(merged.hasAttribute(set)); + } + } + } // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list: intersection, filtered in all, reference in all, X-filteredInX, X - // todo -- test FilteredRecordMergeType - // todo -- no annotate origin - // todo -- test set key + // todo -- test priority list } From a8e0fb26ea309a6620d540310f1224fb08ec7440 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 23 Sep 2011 07:33:20 -0400 Subject: [PATCH 10/20] Updating md5 because the file changed --- .../sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index b4a8498e1..1b2a6e82e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), 1, - Arrays.asList("6645babc8c7d46be0da223477c7b1291")); + Arrays.asList("3008d6f5044bc14801e5c58d985dec72")); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); } } From 4397ce8653eab67546e261faef5ebd5619057b79 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:23:46 -0400 Subject: [PATCH 12/20] Moved removePLs to VariantContextUtils --- .../sting/utils/variantcontext/Genotype.java | 7 ------- .../sting/utils/variantcontext/VariantContextUtils.java | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 2e233c18e..fd8c70abe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -74,13 +74,6 @@ public class Genotype { return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); } - public static Genotype removePLs(Genotype g) { - Map attrs = new HashMap(g.getAttributes()); - attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); - return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); - } - public static Genotype modifyAlleles(Genotype g, List alleles) { return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 03fe969b5..93d4f6f5c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -159,6 +159,13 @@ public class VariantContextUtils { return "%." + precision + "f"; } + public static Genotype removePLs(Genotype g) { + Map attrs = new HashMap(g.getAttributes()); + attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); + attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); + } + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ @@ -740,7 +747,7 @@ public class VariantContextUtils { Map newGs = new HashMap(genotypes.size()); for ( Map.Entry g : genotypes.entrySet() ) { - newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); + newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue()); } return newGs; From a9f073fa68f500fbad4e068b75e3ff924a0d7d66 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:24:49 -0400 Subject: [PATCH 13/20] Genotype merging unit tests for simpleMerge -- Remaining TODOs are all for GdA --- .../VariantContextUtilsUnitTest.java | 319 +++++++++++++----- 1 file changed, 234 insertions(+), 85 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 77e4391a3..c1025a7de 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -21,14 +21,8 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -// our package package org.broadinstitute.sting.utils.variantcontext; - -// the imports for unit testing. - - import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; @@ -47,7 +41,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.*; - public class VariantContextUtilsUnitTest extends BaseTest { Allele Aref, T, C, delRef, ATC, ATCATC; private GenomeLocParser genomeLocParser; @@ -72,10 +65,22 @@ public class VariantContextUtilsUnitTest extends BaseTest { ATCATC = Allele.create("ATCATC"); } + private Genotype makeG(String sample, Allele a1, Allele a2) { + return new Genotype(sample, Arrays.asList(a1, a2)); + } + + private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) { + return new Genotype(sample, Arrays.asList(a1, a2), log10pError); + } + private VariantContext makeVC(String source, List alleles) { return makeVC(source, alleles, null, null); } + private VariantContext makeVC(String source, List alleles, Genotype... g1) { + return makeVC(source, alleles, Arrays.asList(g1)); + } + private VariantContext makeVC(String source, List alleles, String filter) { return makeVC(source, alleles, filter.equals(".") ? null : new HashSet(Arrays.asList(filter))); } @@ -121,66 +126,66 @@ public class VariantContextUtilsUnitTest extends BaseTest { public Object[][] mergeAllelesData() { // first, do no harm new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref)); + Arrays.asList(Aref)); new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref), - Arrays.asList(Aref)); + Arrays.asList(Aref), + Arrays.asList(Aref)); new MergeAllelesTest(Arrays.asList(Aref), - Arrays.asList(Aref, T), - Arrays.asList(Aref, T)); + Arrays.asList(Aref, T), + Arrays.asList(Aref, T)); new MergeAllelesTest(Arrays.asList(Aref, C), - Arrays.asList(Aref, T), - Arrays.asList(Aref, C, T)); + Arrays.asList(Aref, T), + Arrays.asList(Aref, C, T)); new MergeAllelesTest(Arrays.asList(Aref, T), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele new MergeAllelesTest(Arrays.asList(Aref, C, T), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); new MergeAllelesTest(Arrays.asList(Aref, T, C), - Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, C), + Arrays.asList(Aref, C, T)); // sorted by allele new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef)); // todo -- FIXME me GdA + Arrays.asList(delRef)); // todo -- FIXME me GdA new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATC)); + Arrays.asList(delRef, ATC), + Arrays.asList(delRef, ATC)); new MergeAllelesTest(Arrays.asList(delRef), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); new MergeAllelesTest(Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATC, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); new MergeAllelesTest(Arrays.asList(delRef, ATC), - Arrays.asList(delRef, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATCATC), + Arrays.asList(delRef, ATC, ATCATC)); return MergeAllelesTest.getTests(MergeAllelesTest.class); } @Test(dataProvider = "mergeAlleles") public void testMergeAlleles(MergeAllelesTest cfg) { - final List priority = new ArrayList(); final List inputs = new ArrayList(); int i = 0; for ( final List alleles : cfg.inputs ) { final String name = "vcf" + ++i; - priority.add(name); inputs.add(makeVC(name, alleles)); } + final List priority = vcs2priority(inputs); + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, @@ -281,87 +286,82 @@ public class VariantContextUtilsUnitTest extends BaseTest { @DataProvider(name = "mergeFiltered") public Object[][] mergeFilteredData() { new MergeFilteredTest("AllPass", - makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("noFilters", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("oneFiltered", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("onePassOneFail", - makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("AllFiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.MERGE_FILTER_IN_ALL); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY new MergeFilteredTest("FailOneUnfiltered", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "."), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("OneFailAllUnfilteredArg", - makeVC("1", Arrays.asList(Aref, T), "FAIL"), - makeVC("2", Arrays.asList(Aref, T), "."), - makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "FAIL"), + makeVC("2", Arrays.asList(Aref, T), "."), + makeVC("3", Arrays.asList(Aref, T), "FAIL"), + VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); // test excluding allele in filtered record new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), "FAIL"), - makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), "FAIL"), + makeVC("3", Arrays.asList(Aref, T), "."), + String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); // promotion of site from unfiltered to PASSES new MergeFilteredTest("UnfilteredPlusPassIsPass", - makeVC("1", Arrays.asList(Aref, T), "."), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + makeVC("1", Arrays.asList(Aref, T), "."), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("RefInAll", - makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_REF_IN_ALL); + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + VariantContextUtils.MERGE_REF_IN_ALL); new MergeFilteredTest("RefInOne", - makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - "2"); + makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), + makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), + "2"); return MergeFilteredTest.getTests(MergeFilteredTest.class); } @Test(dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { - final List priority = new ArrayList(); - - for ( final VariantContext vc : cfg.inputs ) { - priority.add(vc.getSource()); - } - + final List priority = vcs2priority(cfg.inputs); final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); @@ -375,6 +375,148 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters()); } + // -------------------------------------------------------------------------------- + // + // Test genotype merging + // + // -------------------------------------------------------------------------------- + + private class MergeGenotypesTest extends TestDataProvider { + List inputs; + VariantContext expected; + List priority; + + private MergeGenotypesTest(String name, String priority, VariantContext... arg) { + super(MergeGenotypesTest.class, name); + LinkedList all = new LinkedList(Arrays.asList(arg)); + this.expected = all.pollLast(); + inputs = all; + this.priority = Arrays.asList(priority.split(",")); + } + + public String toString() { + return String.format("%s input=%s expected=%s", super.toString(), inputs, expected); + } + } + + @DataProvider(name = "mergeGenotypes") + public Object[][] mergeGenotypesData() { + new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1))); + + new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1))); + + new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2))); + + new MergeGenotypesTest("NonOverlappingGenotypes", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2))); + + new MergeGenotypesTest("PreserveNoCall", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2))); + + new MergeGenotypesTest("PerserveAlleles", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)), + makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2))); + + new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3))); + + new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), + makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), + makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3))); + + // todo -- GDA -- add tests for merging correct PLs + + return MergeGenotypesTest.getTests(MergeGenotypesTest.class); + } + + // todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE + + @Test(dataProvider = "mergeGenotypes") + public void testMergeGenotypes(MergeGenotypesTest cfg) { + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + + // test alleles are equal + Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); + + // test genotypes + assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes()); + } + + // necessary to not overload equals for genotypes + private void assertGenotypesAreMostlyEqual(Map actual, Map expected) { + if (actual == expected) { + return; + } + + if (actual == null || expected == null) { + Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual); + } + + if (actual.size() != expected.size()) { + Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size()); + } + + for (Map.Entry entry : actual.entrySet()) { + String key = entry.getKey(); + Genotype value = entry.getValue(); + Genotype expectedValue = expected.get(key); + + Assert.assertEquals(value.alleles, expectedValue.alleles); + Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError()); + Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods()); + if ( value.hasLikelihoods() ) + Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods()); + } + } + + @Test + public void testMergeGenotypesUniquify() { + final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)); + final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); + + // test genotypes + Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet(Arrays.asList("s1.1", "s1.2"))); + } + + @Test(expectedExceptions = UserException.class) + public void testMergeGenotypesRequireUnique() { + final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)); + final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)); + + final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false); + } + + // -------------------------------------------------------------------------------- + // + // Misc. tests + // + // -------------------------------------------------------------------------------- + @Test public void testAnnotationSet() { for ( final boolean annotate : Arrays.asList(true, false)) { @@ -395,6 +537,13 @@ public class VariantContextUtilsUnitTest extends BaseTest { } } - // todo -- add tests for subset merging, especially with correct PLs - // todo -- test priority list + private static final List vcs2priority(final Collection vcs) { + final List priority = new ArrayList(); + + for ( final VariantContext vc : vcs ) { + priority.add(vc.getSource()); + } + + return priority; + } } From 106a26c42da8780cd33d7186ec0a658830ad6819 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 08:25:20 -0400 Subject: [PATCH 14/20] Minor file cleanup --- .../utils/variantcontext/VariantContextUtilsUnitTest.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index c1025a7de..15a5549b2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -24,18 +24,14 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.apache.log4j.Priority; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.Test; import org.testng.annotations.DataProvider; -import org.yaml.snakeyaml.Yaml; +import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; @@ -446,8 +442,6 @@ public class VariantContextUtilsUnitTest extends BaseTest { return MergeGenotypesTest.getTests(MergeGenotypesTest.class); } - // todo -- add test for GenotypeMergeType UNIQUIFY, REQUIRE_UNIQUE - @Test(dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, From dfce301bebd62275ad55d326dd2801cbbce47d81 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:03:04 -0400 Subject: [PATCH 15/20] Looks for @Hidden annotation on all classes and excludes them from the docs --- .../sting/utils/help/GenericDocumentationHandler.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 4f1e95499..7ea77a939 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { public boolean includeInDocs(ClassDoc doc) { try { Class type = HelpUtils.getClassForDoc(doc); - return JVMUtils.isConcrete(type); + boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class); + return ! hidden && JVMUtils.isConcrete(type); } catch ( ClassNotFoundException e ) { return false; } From dd65ba5baee8df3b57819e684dd3b63843809836 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:03:37 -0400 Subject: [PATCH 16/20] @Hidden for DocumentationTest and GATKDocsExample --- .../org/broadinstitute/sting/gatk/examples/GATKDocsExample.java | 2 ++ .../broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java | 1 + 2 files changed, 3 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java index 4541a0537..db4f477c2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; * @author Your Name * @since Date created */ +@Hidden public class GATKDocsExample extends RodWalker { /** * Put detailed documentation about the argument here. No need to duplicate the summary information diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java index 933e24784..5b60a9db5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java @@ -42,6 +42,7 @@ import java.util.*; * *

Body test

*/ +@Hidden public class DocumentationTest extends RodWalker { // the docs for the arguments are in the collection @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); From 2bb77a7978881db4ff1d0b57fa7e2afd9a271295 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 09:04:16 -0400 Subject: [PATCH 18/20] Docs for all VariantAnnotator annotations --- .../gatk/walkers/annotator/AlleleBalance.java | 3 + .../annotator/AlleleBalanceBySample.java | 3 + .../walkers/annotator/AnnotationByDepth.java | 5 +- .../gatk/walkers/annotator/BaseCounts.java | 3 + .../annotator/BaseQualityRankSumTest.java | 3 + .../walkers/annotator/ChromosomeCounts.java | 5 + .../walkers/annotator/DepthOfCoverage.java | 7 +- .../annotator/DepthPerAlleleBySample.java | 19 + .../gatk/walkers/annotator/FisherStrand.java | 5 + .../gatk/walkers/annotator/GCContent.java | 3 + .../walkers/annotator/HaplotypeScore.java | 4 + .../gatk/walkers/annotator/HardyWeinberg.java | 3 + .../walkers/annotator/HomopolymerRun.java | 4 +- .../{GLstats.java => InbreedingCoeff.java} | 15 +- .../gatk/walkers/annotator/IndelType.java | 6 +- .../sting/gatk/walkers/annotator/LowMQ.java | 3 + .../annotator/MappingQualityRankSumTest.java | 3 + .../walkers/annotator/MappingQualityZero.java | 3 + .../annotator/MappingQualityZeroBySample.java | 166 ++++--- .../annotator/MappingQualityZeroFraction.java | 5 +- .../gatk/walkers/annotator/NBaseCount.java | 5 +- .../gatk/walkers/annotator/QualByDepth.java | 7 +- .../walkers/annotator/RMSMappingQuality.java | 3 + .../gatk/walkers/annotator/RankSumTest.java | 4 +- .../ReadDepthAndAllelicFractionBySample.java | 416 +++++++++--------- .../walkers/annotator/ReadPosRankSumTest.java | 5 +- .../gatk/walkers/annotator/SBByDepth.java | 5 +- .../gatk/walkers/annotator/SampleList.java | 4 +- .../walkers/annotator/SpanningDeletions.java | 3 + .../annotator/TechnologyComposition.java | 8 +- 30 files changed, 398 insertions(+), 330 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/{GLstats.java => InbreedingCoeff.java} (90%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index cf68a9121..e501258c5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -43,6 +43,9 @@ import java.util.List; import java.util.Map; +/** + * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples + */ public class AlleleBalance extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index ddb7ab828..75c4037d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample + */ public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index dc41dbc81..353fd1c2c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import java.util.Map; - - +/** + * Abstract base class for all annotations that are normalized by depth + */ public abstract class AnnotationByDepth extends InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index ecfd9b707..46aa6d0f3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -47,6 +47,9 @@ import java.util.List; import java.util.Map; +/** + * Count of A, C, G, T bases across all samples + */ public class BaseCounts extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 2a5c996f7..6cab6d95f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -13,6 +13,9 @@ import java.util.LinkedHashMap; import java.util.List; +/** + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele) + */ public class BaseQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index ad06dcf52..5ed2a6761 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -44,6 +44,11 @@ import java.util.List; import java.util.Map; +/** + * Allele count in genotypes, for each ALT allele, in the same order as listed; + * allele Frequency, for each ALT allele, in the same order as listed; total number + * of alleles in called genotypes. + */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index a4d8db5bd..8826de232 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -16,7 +16,12 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Total (unfiltered) depth over all samples. + * + * Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D + * is N * D + */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 1652c8de7..1cd30c51d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -23,6 +23,25 @@ import java.util.List; import java.util.Map; +/** + * The depth of coverage of each VCF allele in this sample + * + * Complementary fields that two important ways of thinking about the depth of the data for this sample + * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal + * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site. + * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the + * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the + * power I have to determine the genotype of the sample at this site, while the AD tells me how many times + * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering + * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like + * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would + * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that + * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that + * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and + * because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base + * assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what + * determine the genotype calls (see below). + */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 0cfca48fa..393eb549c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation + * being seen on only the forward or only the reverse strand) in the reads? More bias is + * indicative of false positive calls. + */ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index a46473f60..11a64b49f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site + */ public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 9af3b8e8e..df6da3b85 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Consistency of the site with two (and only two) segregating haplotypes. Higher scores + * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls. + */ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 045505698..f068ed895 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -19,6 +19,9 @@ import java.util.List; import java.util.Map; +/** + * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium + */ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 463f7a645..197a00243 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -16,7 +16,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Largest contiguous homopolymer run of the variant allele in either direction on the reference. + */ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { private boolean ANNOTATE_INDELS = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java similarity index 90% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 5295d6d21..a14007147 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -17,14 +17,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 5/16/11 - */ -// A set of annotations calculated directly from the GLs -public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { +/** + * Likelihood-based (using PL field) test for the inbreeding among samples. + * + * A continuous generalization of the Hardy-Weinberg test for disequilibrium that works + * well with limited coverage per sample. See the 1000 Genomes Phase I release for + * more information. + */ +public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index bfede40d2..e0abfcf3c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: delangel - * Date: Mar 11, 2011 - * Time: 11:47:33 AM - * To change this template use File | Settings | File Templates. + * Rough category of indel type (insertion, deletion, multi-allelic, other) */ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 09ffe0fb6..753740258 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads + */ public class LowMQ extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index cc62580a9..157c615d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -14,6 +14,9 @@ import java.util.LinkedHashMap; import java.util.List; +/** + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele) + */ public class MappingQualityRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("MQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index f9caae227..3a3efc4e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -19,6 +19,9 @@ import java.util.List; import java.util.Map; +/** + * Total count across all samples of mapping quality zero reads + */ public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 3d234a1e3..f14d7a8a5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -1,85 +1,81 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Feb 4, 2011 - * Time: 6:46:25 PM - * To change this template use File | Settings | File Templates. - */ -public class MappingQualityZeroBySample extends GenotypeAnnotation { - public Map annotate(RefMetaDataTracker tracker, - AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { - if ( g == null || !g.isCalled() ) - return null; - - int mq0 = 0; - ReadBackedPileup pileup = null; - if (vc.isIndel() && context.hasExtendedEventPileup()) - pileup = context.getExtendedEventPileup(); - else if (context.hasBasePileup()) - pileup = context.getBasePileup(); - else return null; - - if (pileup != null) { - for (PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) - mq0++; - } - } - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%d", mq0)); - return map; - } - - public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } - - public List getDescriptions() { return Arrays.asList( - new VCFFormatHeaderLine(getKeyNames().get(0), 1, - VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); } - - -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Count for each sample of mapping quality zero reads + */ +public class MappingQualityZeroBySample extends GenotypeAnnotation { + public Map annotate(RefMetaDataTracker tracker, + AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { + if ( g == null || !g.isCalled() ) + return null; + + int mq0 = 0; + ReadBackedPileup pileup = null; + if (vc.isIndel() && context.hasExtendedEventPileup()) + pileup = context.getExtendedEventPileup(); + else if (context.hasBasePileup()) + pileup = context.getBasePileup(); + else return null; + + if (pileup != null) { + for (PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) + mq0++; + } + } + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%d", mq0)); + return map; + } + + public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } + + public List getDescriptions() { return Arrays.asList( + new VCFFormatHeaderLine(getKeyNames().get(0), 1, + VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); } + + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 3e8fe8998..2164537b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -17,8 +17,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - - +/** + * Fraction of all reads across samples that have mapping quality zero + */ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 74c562045..891e9ae56 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -17,11 +17,8 @@ import java.util.List; import java.util.Map; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 5/16/11 + * The number of N bases, counting only SOLiD data */ - public class NBaseCount extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 9a292c39a..552289309 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -15,7 +16,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth. + * + * Low scores are indicative of false positive calls and artifacts. + */ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 668129888..40f6d20d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -21,6 +21,9 @@ import java.util.List; import java.util.Map; +/** + * Root Mean Square of the mapping quality of the reads across all samples. + */ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 52c704055..93e093248 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -21,7 +21,9 @@ import java.util.List; import java.util.Map; - +/** + * Abstract root for all RankSum based annotations + */ public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index 26ca08380..772541eb6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -1,209 +1,207 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Feb 4, 2011 - * Time: 3:59:27 PM - * To change this template use File | Settings | File Templates. - */ -public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { - - private static String REF_ALLELE = "REF"; - - private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, - AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { - if ( g == null || !g.isCalled() ) - return null; - - if ( vc.isSNP() ) - return annotateSNP(stratifiedContext, vc); - if ( vc.isIndel() ) - return annotateIndel(stratifiedContext, vc); - - return null; - } - - private Map annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { - - if ( ! stratifiedContext.hasBasePileup() ) return null; - - HashMap alleleCounts = new HashMap(); - for ( Allele allele : vc.getAlternateAlleles() ) - alleleCounts.put(allele.getBases()[0], 0); - - ReadBackedPileup pileup = stratifiedContext.getBasePileup(); - int totalDepth = pileup.size(); - - Map map = new HashMap(); - map.put(getKeyNames().get(0), totalDepth); // put total depth in right away - - if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! - - int mq0 = 0; // number of "ref" reads that are acually mq0 - for ( PileupElement p : pileup ) { - if ( p.getMappingQual() == 0 ) { - mq0++; - continue; - } - if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt - alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); - } - - if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do - - // we need to add counts in the correct order - String[] fracs = new String[alleleCounts.size()]; - for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { - fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0)); - } - - map.put(getKeyNames().get(1), fracs); - return map; - } - - private Map annotateIndel(AlignmentContext - stratifiedContext, VariantContext - vc) { - - if ( ! stratifiedContext.hasExtendedEventPileup() ) { - return null; - } - - ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); - if ( pileup == null ) - return null; - int totalDepth = pileup.size(); - - Map map = new HashMap(); - map.put(getKeyNames().get(0), totalDepth); // put total depth in right away - - if ( totalDepth == 0 ) return map; - int mq0 = 0; // number of "ref" reads that are acually mq0 - - HashMap alleleCounts = new HashMap(); - Allele refAllele = vc.getReference(); - - for ( Allele allele : vc.getAlternateAlleles() ) { - - if ( allele.isNoCall() ) { - continue; // this does not look so good, should we die??? - } - - alleleCounts.put(getAlleleRepresentation(allele), 0); - } - - for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { - - if ( e.getMappingQual() == 0 ) { - mq0++; - continue; - } - - if ( e.isInsertion() ) { - - final String b = e.getEventBases(); - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } - - } else { - if ( e.isDeletion() ) { - if ( e.getEventLength() == refAllele.length() ) { - // this is indeed the deletion allele recorded in VC - final String b = DEL; - if ( alleleCounts.containsKey(b) ) { - alleleCounts.put(b, alleleCounts.get(b)+1); - } - } -// else { -// System.out.print(" deletion of WRONG length found"); -// } - } - } - } - - if ( mq0 == totalDepth ) return map; - - String[] fracs = new String[alleleCounts.size()]; - for (int i = 0; i < vc.getAlternateAlleles().size(); i++) - fracs[i] = String.format("%.3f", - ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); - - map.put(getKeyNames().get(1), fracs); - - //map.put(getKeyNames().get(0), counts); - return map; - } - - private String getAlleleRepresentation(Allele allele) { - if ( allele.isNull() ) { // deletion wrt the ref - return DEL; - } else { // insertion, pass actual bases - return allele.getBaseString(); - } - - } - - // public String getIndelBases() - public List getKeyNames() { return Arrays.asList("DP","FA"); } - - public List getDescriptions() { - return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), - 1, - VCFHeaderLineType.Integer, - "Total read depth per sample, including MQ0"), - new VCFFormatHeaderLine(getKeyNames().get(1), - VCFHeaderLineCount.UNBOUNDED, - VCFHeaderLineType.Float, - "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); - } -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Unsupported + */ +@Hidden +public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { + + private static String REF_ALLELE = "REF"; + + private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, + AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + if ( g == null || !g.isCalled() ) + return null; + + if ( vc.isSNP() ) + return annotateSNP(stratifiedContext, vc); + if ( vc.isIndel() ) + return annotateIndel(stratifiedContext, vc); + + return null; + } + + private Map annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { + + if ( ! stratifiedContext.hasBasePileup() ) return null; + + HashMap alleleCounts = new HashMap(); + for ( Allele allele : vc.getAlternateAlleles() ) + alleleCounts.put(allele.getBases()[0], 0); + + ReadBackedPileup pileup = stratifiedContext.getBasePileup(); + int totalDepth = pileup.size(); + + Map map = new HashMap(); + map.put(getKeyNames().get(0), totalDepth); // put total depth in right away + + if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! + + int mq0 = 0; // number of "ref" reads that are acually mq0 + for ( PileupElement p : pileup ) { + if ( p.getMappingQual() == 0 ) { + mq0++; + continue; + } + if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt + alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); + } + + if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do + + // we need to add counts in the correct order + String[] fracs = new String[alleleCounts.size()]; + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { + fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0)); + } + + map.put(getKeyNames().get(1), fracs); + return map; + } + + private Map annotateIndel(AlignmentContext + stratifiedContext, VariantContext + vc) { + + if ( ! stratifiedContext.hasExtendedEventPileup() ) { + return null; + } + + ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); + if ( pileup == null ) + return null; + int totalDepth = pileup.size(); + + Map map = new HashMap(); + map.put(getKeyNames().get(0), totalDepth); // put total depth in right away + + if ( totalDepth == 0 ) return map; + int mq0 = 0; // number of "ref" reads that are acually mq0 + + HashMap alleleCounts = new HashMap(); + Allele refAllele = vc.getReference(); + + for ( Allele allele : vc.getAlternateAlleles() ) { + + if ( allele.isNoCall() ) { + continue; // this does not look so good, should we die??? + } + + alleleCounts.put(getAlleleRepresentation(allele), 0); + } + + for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { + + if ( e.getMappingQual() == 0 ) { + mq0++; + continue; + } + + if ( e.isInsertion() ) { + + final String b = e.getEventBases(); + if ( alleleCounts.containsKey(b) ) { + alleleCounts.put(b, alleleCounts.get(b)+1); + } + + } else { + if ( e.isDeletion() ) { + if ( e.getEventLength() == refAllele.length() ) { + // this is indeed the deletion allele recorded in VC + final String b = DEL; + if ( alleleCounts.containsKey(b) ) { + alleleCounts.put(b, alleleCounts.get(b)+1); + } + } +// else { +// System.out.print(" deletion of WRONG length found"); +// } + } + } + } + + if ( mq0 == totalDepth ) return map; + + String[] fracs = new String[alleleCounts.size()]; + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) + fracs[i] = String.format("%.3f", + ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); + + map.put(getKeyNames().get(1), fracs); + + //map.put(getKeyNames().get(0), counts); + return map; + } + + private String getAlleleRepresentation(Allele allele) { + if ( allele.isNull() ) { // deletion wrt the ref + return DEL; + } else { // insertion, pass actual bases + return allele.getBaseString(); + } + + } + + // public String getIndelBases() + public List getKeyNames() { return Arrays.asList("DP","FA"); } + + public List getDescriptions() { + return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), + 1, + VCFHeaderLineType.Integer, + "Total read depth per sample, including MQ0"), + new VCFFormatHeaderLine(getKeyNames().get(1), + VCFHeaderLineCount.UNBOUNDED, + VCFHeaderLineType.Float, + "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index aabfb2970..27a9306d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -19,11 +19,8 @@ import java.util.LinkedHashMap; import java.util.List; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 3/30/11 + * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error). */ - public class ReadPosRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("ReadPosRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index 180bed24d..efe96f226 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -15,8 +15,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - - +/** + * SB annotation value by depth of alt containing samples + */ public class SBByDepth extends AnnotationByDepth { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index cd396036f..ff409484d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -41,7 +41,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - +/** + * List all of the samples in the info field + */ public class SampleList extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 42203824f..f747fbc2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +/** + * Fraction of reads containing spanning deletions at this site. + */ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index fa48c57a3..1f5508f4c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -19,12 +20,9 @@ import java.util.List; import java.util.Map; /** - * Created by IntelliJ IDEA. - * User: delangel - * Date: 6/29/11 - * Time: 3:14 PM - * To change this template use File | Settings | File Templates. + * Counts of bases from SLX, 454, and SOLiD at this site */ +@Hidden public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { private String nSLX = "NumSLX"; private String n454 ="Num454"; From e3d4efb2834b6aa5049a36b3e7ad4ca9458140d2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 23 Sep 2011 11:55:21 -0400 Subject: [PATCH 20/20] Remove N2 EXACT model code, which should never be used --- .../genotyper/ExactAFCalculationModel.java | 251 +----------------- .../genotyper/UnifiedArgumentCollection.java | 5 - 2 files changed, 9 insertions(+), 247 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 6ae437b27..8a3a97823 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // code for testing purposes // private final static boolean DEBUG = false; - private final static boolean PRINT_LIKELIHOODS = false; - private final static int N_CYCLES = 1; - private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded"); - private SimpleTimer timerGS = new SimpleTimer("linearExactGS"); - private final static boolean COMPARE_TO_GS = false; - - public enum ExactCalculation { - N2_GOLD_STANDARD, - LINEAR_EXPERIMENTAL - } - private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - - private boolean SIMPLE_GREEDY_GENOTYPER = false; - + private final boolean SIMPLE_GREEDY_GENOTYPER = false; private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - final private ExactCalculation calcToUse; protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); - calcToUse = UAC.EXACT_CALCULATION_TYPE; } public void getLog10PNonRef(RefMetaDataTracker tracker, @@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { Map GLs, Setalleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - double[] gsPosteriors; - if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here - gsPosteriors = log10AlleleFrequencyPosteriors.clone(); - - int idxAA = GenotypeType.AA.ordinal(); - int idxAB = GenotypeType.AB.ordinal(); - int idxBB = GenotypeType.BB.ordinal(); - - // todo -- remove me after testing - if ( N_CYCLES > 1 ) { - for ( int i = 0; i < N_CYCLES; i++) { - timerGS.restart(); - linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB); - timerGS.stop(); - - timerExpt.restart(); - linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone()); - timerExpt.stop(); - } - - System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n", - timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime()); - } - - int lastK = -1; - - int numAlleles = alleles.size(); + final int numAlleles = alleles.size(); + final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null; + final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null; int idxDiag = numAlleles; int incr = numAlleles - 1; - - double[][] posteriorCache = new double[numAlleles-1][]; - double[] bestAFguess = new double[numAlleles-1]; - for (int k=1; k < numAlleles; k++) { // multi-allelic approximation, part 1: Ideally // for each alt allele compute marginal (suboptimal) posteriors - @@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD - idxAA = 0; - idxAB = k; + final int idxAA = 0; + final int idxAB = k; // yy is always element on the diagonal. // 2 alleles: BBelement 2 // 3 alleles: BB element 3. CC element 5 // 4 alleles: - idxBB = idxDiag; + final int idxBB = idxDiag; idxDiag += incr--; - // todo - possible cleanup - switch ( calcToUse ) { - case N2_GOLD_STANDARD: - lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - break; - case LINEAR_EXPERIMENTAL: - lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - break; - } + final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); + if (numAlleles > 2) { posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); @@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); } - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - // todo -- REMOVE ME AFTER TESTING - if ( COMPARE_TO_GS ) { - gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB); - - double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]); - double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]); - boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0; - - if ( ! eq || PRINT_LIKELIHOODS ) { - System.out.printf("----------------------------------------%n"); - for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) { - double x = log10AlleleFrequencyPosteriors[k]; - System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k, - x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k], - log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]); - } - System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n", - ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar); - } - } - } private static final ArrayList getGLs(Map GLs) { ArrayList genotypeLikelihoods = new ArrayList(); - //int j = 0; genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy for ( Genotype sample : GLs.values() ) { if ( sample.hasLikelihoods() ) { - //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); double[] gls = sample.getLikelihoods().getAsVector(); if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) @@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - // now with banding - public int linearExactBanded(Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { - throw new NotImplementedException(); -// final int numSamples = GLs.size(); -// final int numChr = 2*numSamples; -// final double[][] genotypeLikelihoods = getGLs(GLs); -// -// final ExactACCache logY = new ExactACCache(numSamples+1); -// logY.getkMinus0()[0] = 0.0; // the zero case -// -// double maxLog10L = Double.NEGATIVE_INFINITY; -// boolean done = false; -// int lastK = -1; -// final int BAND_SIZE = 10; -// -// for (int k=0; k <= numChr && ! done; k++ ) { -// final double[] kMinus0 = logY.getkMinus0(); -// int jStart = Math.max(k - BAND_SIZE, 1); -// int jStop = Math.min(k + BAND_SIZE, numSamples); -// -// if ( k == 0 ) { // special case for k = 0 -// for ( int j=1; j <= numSamples; j++ ) { -// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()]; -// } -// } else { // k > 0 -// final double[] kMinus1 = logY.getkMinus1(); -// final double[] kMinus2 = logY.getkMinus2(); -// Arrays.fill(kMinus0,0); -// -// for ( int j = jStart; j <= jStop; j++ ) { -// final double[] gl = genotypeLikelihoods[j]; -// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1]; -// -// double aa = Double.NEGATIVE_INFINITY; -// double ab = Double.NEGATIVE_INFINITY; -// if (k < 2*j-1) -// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()]; -// -// if (k < 2*j) -// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()]; -// -// double log10Max; -// if (k > 1) { -// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()]; -// log10Max = approximateLog10SumLog10(aa, ab, bb); -// } else { -// // we know we aren't considering the BB case, so we can use an optimized log10 function -// log10Max = approximateLog10SumLog10(aa, ab); -// } -// -// // finally, update the L(j,k) value -// kMinus0[j] = log10Max - logDenominator; -// -// String offset = Utils.dupString(' ',k); -// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]); -// } -// } -// -// // update the posteriors vector -// final double log10LofK = kMinus0[jStop]; -// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; -// -// // can we abort early? -// lastK = k; -// maxLog10L = Math.max(maxLog10L, log10LofK); -// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { -// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); -// done = true; -// } -// -// logY.rotate(); -// } -// -// return lastK; - } - public int linearExact(Map GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { @@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return calls; } - // ------------------------------------------------------------------------------------- - // - // Gold standard, but O(N^2), implementation. - // - // TODO -- remove me for clarity in this code - // - // ------------------------------------------------------------------------------------- - public int gdaN2GoldStandard(Map GLs, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { - int numSamples = GLs.size(); - int numChr = 2*numSamples; - - double[][] logYMatrix = new double[1+numSamples][1+numChr]; - - for (int i=0; i <=numSamples; i++) - for (int j=0; j <=numChr; j++) - logYMatrix[i][j] = Double.NEGATIVE_INFINITY; - - //YMatrix[0][0] = 1.0; - logYMatrix[0][0] = 0.0; - int j=0; - - for ( Map.Entry sample : GLs.entrySet() ) { - j++; - - if ( !sample.getValue().hasLikelihoods() ) - continue; - - //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); - double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector(); - //double logDenominator = Math.log10(2.0*j*(2.0*j-1)); - double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; - - // special treatment for k=0: iteration reduces to: - //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()]; - logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA]; - - for (int k=1; k <= 2*j; k++ ) { - - //double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()]; - double logNumerator[]; - logNumerator = new double[3]; - if (k < 2*j-1) - logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] + - genotypeLikelihoods[idxAA]; - else - logNumerator[0] = Double.NEGATIVE_INFINITY; - - - if (k < 2*j) - logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] + - genotypeLikelihoods[idxAB]; - else - logNumerator[1] = Double.NEGATIVE_INFINITY; - - if (k > 1) - logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] + - genotypeLikelihoods[idxBB]; - else - logNumerator[2] = Double.NEGATIVE_INFINITY; - - double logNum = MathUtils.softMax(logNumerator); - - //YMatrix[j][k] = num/den; - logYMatrix[j][k] = logNum - logDenominator; - } - - } - - for (int k=0; k <= numChr; k++) - log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k]; - - return numChr; - } - private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { int j = logYMatrix.length - 1; System.out.printf("-----------------------------------%n"); @@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); } } - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 7b8045581..3c8fd4451 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -168,10 +168,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false) public boolean GSA_PRODUCTION_ONLY = false; - @Hidden - @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false) - public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL; - @Hidden @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; @@ -191,7 +187,6 @@ public class UnifiedArgumentCollection { uac.GLmodel = GLmodel; uac.AFmodel = AFmodel; - uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE; uac.heterozygosity = heterozygosity; uac.PCR_error = PCR_error; uac.GenotypingMode = GenotypingMode;