diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index b70b00f94..d2941c637 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; @@ -40,7 +41,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.classloader.PackageUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.utils.vcf.VCFUtils; @@ -68,15 +68,20 @@ public class VariantAnnotator extends RodWalker { @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) protected List annotationGroupsToUse = new ArrayList(); + @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false) + protected List expressionsToUse = new ArrayList(); + @Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false) protected Boolean USE_ALL_ANNOTATIONS = false; @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") protected Boolean LIST = false; + @Hidden @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) protected String ASSUME_SINGLE_SAMPLE = null; + @Hidden @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; @@ -132,7 +137,7 @@ public class VariantAnnotator extends RodWalker { if ( USE_ALL_ANNOTATIONS ) engine = new VariantAnnotatorEngine(getToolkit()); else - engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse); + engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse, expressionsToUse); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 3cded6029..df892075e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -60,6 +60,7 @@ public class VariantAnnotatorEngine { private List requestedInfoAnnotations; private List requestedGenotypeAnnotations; + private List requestedExpressions = new ArrayList(); private HashMap dbAnnotations = new HashMap(); @@ -77,17 +78,44 @@ public class VariantAnnotatorEngine { // printing out stats at the end. private Map inputTableHitCounter = new HashMap(); + private static class VAExpression { + public String fullName, bindingName, fieldName; + + public VAExpression(String fullEpression) { + int indexOfDot = fullEpression.lastIndexOf("."); + if ( indexOfDot == -1 ) + throw new UserException.BadArgumentValue(fullEpression, "it should be in rodname.value format"); + + fullName = fullEpression; + bindingName = fullEpression.substring(0, indexOfDot); + fieldName = fullEpression.substring(indexOfDot+1); + } + } // use this constructor if you want all possible annotations public VariantAnnotatorEngine(GenomeAnalysisEngine engine) { requestedInfoAnnotations = PackageUtils.getInstancesOfClassesImplementingInterface(InfoFieldAnnotation.class); requestedGenotypeAnnotations = PackageUtils.getInstancesOfClassesImplementingInterface(GenotypeAnnotation.class); - initialize(engine); + initializeDBs(engine); } - // use this constructor if you want to select specific annotations (and/or interfaces) + // use this constructor if you want to select specific annotations (and/or interfaces), but no expressions public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List annotationGroupsToUse, List annotationsToUse) { + initializeAnnotations(annotationGroupsToUse, annotationsToUse); + initializeDBs(engine); + } + // use this constructor if you want to select specific annotations (and/or interfaces) plus expressions + public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List annotationGroupsToUse, List annotationsToUse, List expressionsToUse) { + initializeAnnotations(annotationGroupsToUse, annotationsToUse); + initializeDBs(engine); + + // set up the expressions + for ( String expression : expressionsToUse ) + requestedExpressions.add(new VAExpression(expression)); + } + + private void initializeAnnotations(List annotationGroupsToUse, List annotationsToUse) { // create a map for all annotation classes which implement our top-level interfaces HashMap classMap = new HashMap(); for ( Class c : PackageUtils.getClassesImplementingInterface(InfoFieldAnnotation.class) ) @@ -130,11 +158,9 @@ public class VariantAnnotatorEngine { if ( GenotypeAnnotation.class.isAssignableFrom(c) ) requestedGenotypeAnnotations.add((GenotypeAnnotation)PackageUtils.getSimpleInstance(c)); } - - initialize(engine); } - private void initialize(GenomeAnalysisEngine engine) { + private void initializeDBs(GenomeAnalysisEngine engine) { // check to see whether comp rods were included List dataSources = engine.getRodDataSources(); @@ -180,6 +206,50 @@ public class VariantAnnotatorEngine { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); // annotate db occurrences + annotateDBs(tracker, ref, vc, infoAnnotations); + + // annotate expressions where available + annotateExpressions(tracker, ref, infoAnnotations); + + // process the info field + List> infoAnnotationOutputsList = new LinkedList>(); //each element in infoAnnotationOutputs corresponds to a single line in the output VCF file + infoAnnotationOutputsList.add(new LinkedHashMap(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file has 1 additional line. + infoAnnotationOutputsList.get(0).putAll(infoAnnotations); // put the DB membership info in + + // go through all the requested info annotationTypes + for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) + { + Map annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc); + if ( annotationsFromCurrentType == null ) { + continue; + } + + if(annotationType instanceof GenomicAnnotation) + { + infoAnnotationOutputsList = processGenomicAnnotation( infoAnnotationOutputsList, annotationsFromCurrentType ); + } + else + { + // add the annotations to each output line. + for(Map infoAnnotationOutput : infoAnnotationOutputsList) { + infoAnnotationOutput.putAll(annotationsFromCurrentType); + } + } + } + + // annotate genotypes + Map genotypes = annotateGenotypes(tracker, ref, stratifiedContexts, vc); + + // create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList + Collection returnValue = new LinkedList(); + for(Map infoAnnotationOutput : infoAnnotationOutputsList) { + returnValue.add( new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) ); + } + + return returnValue; + } + + private void annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map infoAnnotations) { for ( Map.Entry dbSet : dbAnnotations.entrySet() ) { if ( dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); @@ -198,64 +268,43 @@ public class VariantAnnotatorEngine { infoAnnotations.put(dbSet.getValue(), overlapsComp ? true : false); } } + } - //Process the info field - List> infoAnnotationOutputsList = new LinkedList>(); //each element in infoAnnotationOutputs corresponds to a single line in the output VCF file - infoAnnotationOutputsList.add(new LinkedHashMap(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file has 1 additional line. - infoAnnotationOutputsList.get(0).putAll(infoAnnotations); // put the DB membership info in + private void annotateExpressions(RefMetaDataTracker tracker, ReferenceContext ref, Map infoAnnotations) { + for ( VAExpression expression : requestedExpressions ) { + Collection VCs = tracker.getVariantContexts(ref, expression.bindingName, null, ref.getLocus(), false, true); + if ( VCs.size() == 0 ) + continue; - //go through all the requested info annotationTypes - for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) - { - Map annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc); - if ( annotationsFromCurrentType == null ) { + VariantContext vc = VCs.iterator().next(); + if ( vc.hasAttribute(expression.fieldName) ) + infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName)); + } + } + + private Map annotateGenotypes(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( requestedGenotypeAnnotations.size() == 0 ) + return vc.getGenotypes(); + + Map genotypes = new HashMap(vc.getNSamples()); + for ( Map.Entry g : vc.getGenotypes().entrySet() ) { + Genotype genotype = g.getValue(); + StratifiedAlignmentContext context = stratifiedContexts.get(g.getKey()); + if ( context == null ) { + genotypes.put(g.getKey(), genotype); continue; } - if(annotationType instanceof GenomicAnnotation) - { - infoAnnotationOutputsList = processGenomicAnnotation( infoAnnotationOutputsList, annotationsFromCurrentType ); - } - else - { - //add the annotations to each output line. - for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - infoAnnotationOutput.putAll(annotationsFromCurrentType); - } + Map genotypeAnnotations = new HashMap(genotype.getAttributes()); + for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) { + Map result = annotation.annotate(tracker, ref, context, vc, genotype); + if ( result != null ) + genotypeAnnotations.putAll(result); } + genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.genotypesArePhased())); } - //Process genotypes - Map genotypes; - if ( requestedGenotypeAnnotations.size() == 0 ) { - genotypes = vc.getGenotypes(); - } else { - genotypes = new HashMap(vc.getNSamples()); - for ( Map.Entry g : vc.getGenotypes().entrySet() ) { - Genotype genotype = g.getValue(); - StratifiedAlignmentContext context = stratifiedContexts.get(g.getKey()); - if ( context == null ) { - genotypes.put(g.getKey(), genotype); - continue; - } - - Map genotypeAnnotations = new HashMap(genotype.getAttributes()); - for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) { - Map result = annotation.annotate(tracker, ref, context, vc, genotype); - if ( result != null ) - genotypeAnnotations.putAll(result); - } - genotypes.put(g.getKey(), new Genotype(g.getKey(), genotype.getAlleles(), genotype.getNegLog10PError(), genotype.getFilters(), genotypeAnnotations, genotype.genotypesArePhased())); - } - } - - //Create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList - Collection returnValue = new LinkedList(); - for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - returnValue.add( new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) ); - } - - return returnValue; + return genotypes; } // Finish processing data from GenomicAnnotation. diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index a46ed91c3..a797287d2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -107,6 +107,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("getting DB tag with HM3", spec); } + @Test + public void testUsingExpression() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1, + Arrays.asList("9362ae272d5e2ff2cfe7db307a324b6d")); + executeTest("using expression", spec); + } + @Test public void testTabixAnnotations() { final String MD5 = "6c7a6a1c0027bf82656542a9b2671a35";