From 24b72334b3b30da8384f2ffcf03ab673502c2320 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 11 Oct 2011 11:32:27 -0400 Subject: [PATCH 1/5] UnifiedGenotyper now correctly initializes the VariantAnnotator engine. This allows the annotation classes to perform any necessary initialization/validation. For example, it allows the SnpEff annotator to (among other things) validate its rod binding. This will prevent a NullPointerException when SnpEff annotation is requested but no rod binding is present. Added an integration test to cover this case so that it doesn't break again. --- .../gatk/walkers/genotyper/UnifiedGenotyper.java | 8 +++++++- .../genotyper/UnifiedGenotyperIntegrationTest.java | 12 ++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 428f97e2a..9fdf65015 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -214,7 +214,13 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); + + // invoke initialize() method on each of the annotation classes, allowing them to add their own header lines + // and perform any necessary initialization/validation steps + annotationEngine.invokeAnnotationInitializationMethods(headerInfo); + + writer.writeHeader(new VCFHeader(headerInfo, samples)); } private Set getHeaderInfo() { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 41496bdf1..7ef75ec53 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import java.io.File; @@ -285,6 +286,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { } - - + @Test + public void testSnpEffAnnotationRequestedWithoutRodBinding() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " + + "-A SnpEff", + 1, + UserException.class); + executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec); + } } From 794f2758710eb1d26cb43c757c9aedc0aafcedbe Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 11 Oct 2011 12:08:56 -0400 Subject: [PATCH 2/5] SnpEff is now marked as a RodRequiringAnnotation instead of an ExperimentalAnnotation. Having SnpEff grouped with the Experimental annotations was proving problematic, since it requires a rod. Placing it in its own group should improve the situation somewhat, making it easier to request "all annotations except for SnpEff". --- .../broadinstitute/sting/gatk/walkers/annotator/SnpEff.java | 4 ++-- .../walkers/annotator/interfaces/RodRequiringAnnotation.java | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 973b3277d..85977bf8e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -31,8 +31,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -50,7 +50,7 @@ import java.util.*; * * @author David Roazen */ -public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation { +public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotation { private static Logger logger = Logger.getLogger(SnpEff.class); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java new file mode 100644 index 000000000..9de3ca87c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java @@ -0,0 +1,3 @@ +package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; + +public interface RodRequiringAnnotation extends AnnotationType {} \ No newline at end of file From a2733a451f065b2c41fea282e63b9f05401d87cd Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 11 Oct 2011 19:31:45 -0400 Subject: [PATCH 3/5] Added NotCalled feature to GAV Added "not called" and "no status" to the truth table. Very useful. --- .../validation/GenotypeAndValidateWalker.java | 62 ++++++++++++++----- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index f416e94a0..c2f6e2099 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.MutableVariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -266,8 +265,13 @@ public class GenotypeAndValidateWalker extends RodWalker 0 && context.getBasePileup().getBases().length < minDepth)) { counter.nUncovered = 1L; + if (vcComp.getAttribute("GV").equals("T")) + counter.nAltNotCalled = 1L; + else if (vcComp.getAttribute("GV").equals("F")) + counter.nRefNotCalled = 1L; + else + counter.nNoStatusNotCalled = 1L; + return counter; } @@ -382,7 +398,7 @@ public class GenotypeAndValidateWalker extends RodWalker 0) ? 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt)) : 100; logger.info(String.format("Resulting Truth Table Output\n\n" + - "---------------------------------------------------\n" + - "\t\t|\tALT\t|\tREF\t\n" + - "---------------------------------------------------\n" + - "called alt\t|\t%d\t|\t%d\n" + - "called ref\t|\t%d\t|\t%d\n" + - "---------------------------------------------------\n" + + "------------------------------------------------------------------\n" + + "\t\t|\tALT\t|\tREF\t|\tNo Status\n" + + "------------------------------------------------------------------\n" + + "called alt\t|\t%d\t|\t%d\t|\t%d\n" + + "called ref\t|\t%d\t|\t%d\t|\t%d\n" + + "not called\t|\t%d\t|\t%d\t|\t%d\n" + + "------------------------------------------------------------------\n" + "positive predictive value: %f%%\n" + "negative predictive value: %f%%\n" + - "---------------------------------------------------\n" + + "------------------------------------------------------------------\n" + "sensitivity: %f%%\n" + "specificity: %f%%\n" + - "---------------------------------------------------\n" + + "------------------------------------------------------------------\n" + "not confident: %d\n" + "not covered: %d\n" + - "---------------------------------------------------\n", reduceSum.nAltCalledAlt, reduceSum.nRefCalledAlt, reduceSum.nAltCalledRef, reduceSum.nRefCalledRef, ppv, npv, sensitivity, specificity, reduceSum.nNotConfidentCalls, reduceSum.nUncovered)); + "------------------------------------------------------------------\n", reduceSum.nAltCalledAlt, reduceSum.nRefCalledAlt, reduceSum.nNoStatusCalledAlt, reduceSum.nAltCalledRef, reduceSum.nRefCalledRef, reduceSum.nNoStatusCalledRef, reduceSum.nAltNotCalled, reduceSum.nRefNotCalled, reduceSum.nNoStatusNotCalled, ppv, npv, sensitivity, specificity, reduceSum.nNotConfidentCalls, reduceSum.nUncovered)); } } From e53a952aeb10afca4bf863cb5cc9bb9e78d3ce14 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 12 Oct 2011 01:57:02 -0400 Subject: [PATCH 4/5] Added ION Torrent support to CountCovariates. --- .../sting/gatk/walkers/recalibration/CycleCovariate.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 5d07922a7..e117454f9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -164,6 +164,7 @@ public class CycleCovariate implements StandardCovariate { private static List LS454_NAMES = Arrays.asList("454"); private static List COMPLETE_GENOMICS_NAMES = Arrays.asList("COMPLETE"); private static List PACBIO_NAMES = Arrays.asList("PACBIO"); + private static List ION_TORRENT_NAMES = Arrays.asList("IONTORRENT"); private static boolean isPlatform(SAMRecord read, List names) { String pl = read.getReadGroup().getPlatform().toUpperCase(); @@ -224,10 +225,10 @@ public class CycleCovariate implements StandardCovariate { } //----------------------------- - // 454 + // 454 and Ion Torrent //----------------------------- - else if ( isPlatform(read, LS454_NAMES) ) { // Some bams have "LS454" and others have just "454" + else if ( isPlatform(read, LS454_NAMES) || isPlatform(read, ION_TORRENT_NAMES)) { // Some bams have "LS454" and others have just "454" final int readLength = read.getReadLength(); final byte[] bases = read.getReadBases(); From 9aecd504735fc687493cb5ee4a3c7d5a1d253705 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 12 Oct 2011 15:44:54 -0400 Subject: [PATCH 5/5] Adding ability to exclude annotations from the VA and UG lists. As described in the docs, this argument trumps all others (including -all) so that we can get around the SnpEff issue brought up by Menachem. Added integration test for it. --- .../walkers/annotator/VariantAnnotator.java | 14 +++++++-- .../annotator/VariantAnnotatorEngine.java | 29 ++++++++++++++++--- .../walkers/genotyper/UnifiedGenotyper.java | 9 +++++- .../VariantAnnotatorIntegrationTest.java | 26 +++++++++++------ 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 3be87da80..722326018 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -132,6 +132,13 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false) protected List annotationsToUse = new ArrayList(); + /** + * Note that this argument has higher priority than the -A or -G arguments, + * so annotations will be excluded even if they are explicitly included with the other options. + */ + @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false) + protected List annotationsToExclude = new ArrayList(); + /** * See the -list argument to view available groups. */ @@ -148,6 +155,9 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false) protected List expressionsToUse = new ArrayList(); + /** + * Note that the -XL argument can be used along with this one to exclude annotations. + */ @Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false) protected Boolean USE_ALL_ANNOTATIONS = false; @@ -209,9 +219,9 @@ public class VariantAnnotator extends RodWalker implements Ann } if ( USE_ALL_ANNOTATIONS ) - engine = new VariantAnnotatorEngine(this, getToolkit()); + engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit()); else - engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit()); + engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit()); engine.initializeExpressions(expressionsToUse); // setup the header fields diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index e5effe6d8..e4bc0d5d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -73,19 +73,20 @@ public class VariantAnnotatorEngine { } // use this constructor if you want all possible annotations - public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) { + public VariantAnnotatorEngine(List annotationsToExclude, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) { this.walker = walker; this.toolkit = toolkit; requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations(); requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations(); + excludeAnnotations(annotationsToExclude); initializeDBs(); } // use this constructor if you want to select specific annotations (and/or interfaces) - public VariantAnnotatorEngine(List annotationGroupsToUse, List annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) { + public VariantAnnotatorEngine(List annotationGroupsToUse, List annotationsToUse, List annotationsToExclude, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) { this.walker = walker; this.toolkit = toolkit; - initializeAnnotations(annotationGroupsToUse, annotationsToUse); + initializeAnnotations(annotationGroupsToUse, annotationsToUse, annotationsToExclude); initializeDBs(); } @@ -96,10 +97,30 @@ public class VariantAnnotatorEngine { requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings())); } - private void initializeAnnotations(List annotationGroupsToUse, List annotationsToUse) { + private void initializeAnnotations(List annotationGroupsToUse, List annotationsToUse, List annotationsToExclude) { AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse); requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse); requestedGenotypeAnnotations = AnnotationInterfaceManager.createGenotypeAnnotations(annotationGroupsToUse, annotationsToUse); + excludeAnnotations(annotationsToExclude); + } + + private void excludeAnnotations(List annotationsToExclude) { + if ( annotationsToExclude.size() == 0 ) + return; + + List tempRequestedInfoAnnotations = new ArrayList(requestedInfoAnnotations.size()); + for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) { + if ( !annotationsToExclude.contains(annotation.getClass().getSimpleName()) ) + tempRequestedInfoAnnotations.add(annotation); + } + requestedInfoAnnotations = tempRequestedInfoAnnotations; + + List tempRequestedGenotypeAnnotations = new ArrayList(requestedGenotypeAnnotations.size()); + for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) { + if ( !annotationsToExclude.contains(annotation.getClass().getSimpleName()) ) + tempRequestedGenotypeAnnotations.add(annotation); + } + requestedGenotypeAnnotations = tempRequestedGenotypeAnnotations; } private void initializeDBs() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 9fdf65015..72dc217e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -149,6 +149,13 @@ public class UnifiedGenotyper extends LocusWalker annotationsToUse = new ArrayList(); + /** + * Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments, + * so annotations will be excluded even if they are explicitly included with the other options. + */ + @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false) + protected List annotationsToExclude = new ArrayList(); + /** * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. */ @@ -210,7 +217,7 @@ public class UnifiedGenotyper extends LocusWalker