Adding ability to exclude annotations from the VA and UG lists. As described in the docs, this argument trumps all others (including -all) so that we can get around the SnpEff issue brought up by Menachem. Added integration test for it.

This commit is contained in:
Eric Banks 2011-10-12 15:44:54 -04:00
parent e53a952aeb
commit 9aecd50473
4 changed files with 62 additions and 16 deletions

View File

@ -132,6 +132,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected List<String> annotationsToUse = new ArrayList<String>();
/**
* Note that this argument has higher priority than the -A or -G arguments,
* so annotations will be excluded even if they are explicitly included with the other options.
*/
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
protected List<String> annotationsToExclude = new ArrayList<String>();
/**
* See the -list argument to view available groups.
*/
@ -148,6 +155,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false)
protected List<String> expressionsToUse = new ArrayList<String>();
/**
* Note that the -XL argument can be used along with this one to exclude annotations.
*/
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
protected Boolean USE_ALL_ANNOTATIONS = false;
@ -209,9 +219,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
}
if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(this, getToolkit());
engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
else
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
// setup the header fields

View File

@ -73,19 +73,20 @@ public class VariantAnnotatorEngine {
}
// use this constructor if you want all possible annotations
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
public VariantAnnotatorEngine(List<String> annotationsToExclude, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
this.toolkit = toolkit;
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
excludeAnnotations(annotationsToExclude);
initializeDBs();
}
// use this constructor if you want to select specific annotations (and/or interfaces)
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, List<String> annotationsToExclude, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
this.toolkit = toolkit;
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
initializeAnnotations(annotationGroupsToUse, annotationsToUse, annotationsToExclude);
initializeDBs();
}
@ -96,10 +97,30 @@ public class VariantAnnotatorEngine {
requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings()));
}
private void initializeAnnotations(List<String> annotationGroupsToUse, List<String> annotationsToUse) {
private void initializeAnnotations(List<String> annotationGroupsToUse, List<String> annotationsToUse, List<String> annotationsToExclude) {
AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse);
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse);
requestedGenotypeAnnotations = AnnotationInterfaceManager.createGenotypeAnnotations(annotationGroupsToUse, annotationsToUse);
excludeAnnotations(annotationsToExclude);
}
private void excludeAnnotations(List<String> annotationsToExclude) {
if ( annotationsToExclude.size() == 0 )
return;
List<InfoFieldAnnotation> tempRequestedInfoAnnotations = new ArrayList<InfoFieldAnnotation>(requestedInfoAnnotations.size());
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) {
if ( !annotationsToExclude.contains(annotation.getClass().getSimpleName()) )
tempRequestedInfoAnnotations.add(annotation);
}
requestedInfoAnnotations = tempRequestedInfoAnnotations;
List<GenotypeAnnotation> tempRequestedGenotypeAnnotations = new ArrayList<GenotypeAnnotation>(requestedGenotypeAnnotations.size());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
if ( !annotationsToExclude.contains(annotation.getClass().getSimpleName()) )
tempRequestedGenotypeAnnotations.add(annotation);
}
requestedGenotypeAnnotations = tempRequestedGenotypeAnnotations;
}
private void initializeDBs() {

View File

@ -149,6 +149,13 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected List<String> annotationsToUse = new ArrayList<String>();
/**
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments,
* so annotations will be excluded even if they are explicitly included with the other options.
*/
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
protected List<String> annotationsToExclude = new ArrayList<String>();
/**
* Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
*/
@ -210,7 +217,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
// initialize the header

View File

@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("8e7de435105499cd71ffc099e268a83e"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("64b6804cb1e27826e3a47089349be581"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,15 +71,23 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@Test
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b49fe03aa4b675db80a9db38a3552c95"));
executeTest("test exclude annotations", spec);
}
@Test
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" --variant:VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
baseTestString() + " -G Standard --variant:VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
executeTest("test overwriting header", spec);
}
@ -87,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
executeTest("not passing it any reads", spec);
}
@ -103,7 +111,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
baseTestString() + " --comp:H3 " + validationDataLocation + "fakeHM3.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
executeTest("getting DB tag with HM3", spec);
}
@ -111,7 +119,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1,
Arrays.asList("e9c0d832dc6b4ed06c955060f830c140"));
executeTest("using expression", spec);
}
@ -121,7 +129,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf";
for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -A HomopolymerRun --variant:VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1,
baseTestString() + " -A HomopolymerRun --variant:VCF " + validationDataLocation + file + " -BTI variant -NO_HEADER", 1,
Arrays.asList(MD5));
executeTest("Testing lookup vcf tabix vs. vcf tribble", spec);
}