From 653e08c0b60b9145932e07541c204ecbd26399aa Mon Sep 17 00:00:00 2001 From: weisburd Date: Thu, 22 Apr 2010 12:11:03 +0000 Subject: [PATCH] Takes a refGene table ( -B arg must be: -B refgene,AnnotatorInfoTable,/path/to/refgene_file.txt) and generates the big table of nucleotides containing annotations for each possible variant at each transcript position (eg. 4 variants for each position). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3237 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/GenomicAnnotation.java | 10 +++----- .../walkers/annotator/GenomicAnnotator.java | 24 ++++++++++++------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java index 6fdf7ad97..b54ca4eda 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java @@ -23,16 +23,12 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; - - - /** * This plugin for {@link VariantAnnotatorEngine} serves as the core * of the {@link GenomicAnnotator}. It finds all records in the -B input files - * that match the given variant's position and, optionally, it's reference and alternate alleles. - * Whether or not matching is done by reference and alternate alleles for a particular input file - * based solely on whether the given -B input has columns named "haplotypeReference" and - * "haplotypeAlternate". + * that match the given variant's position and, optionally, the variant's reference and alternate alleles. + * + * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator */ public class GenomicAnnotation implements InfoFieldAnnotation { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java index 16624069c..45a0d9fc6 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java @@ -35,6 +35,7 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; @@ -42,31 +43,26 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.walkers.Allows; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; - /** * Annotates variant calls with information from user-specified tabular files. * * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator */ //@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class)) -@Allows(value={DataSource.READS, DataSource.REFERENCE}) -@Reference(window=@Window(start=-50,stop=50)) +//@Allows(value={DataSource.READS, DataSource.REFERENCE}) +//@Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) public class GenomicAnnotator extends RodWalker { @Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) @@ -81,6 +77,18 @@ public class GenomicAnnotator extends RodWalker { @Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false) protected Boolean ONE_TO_MANY = false; + @Argument(fullName="join", shortName="j", doc="TODO If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false) + protected String[] JOIN_COLUMNS = {}; + + /* + NOTE: there are several cases for file1.a=file2.b + if, for a particular locus: + only file1.0 matches, INNER-JOIN - skip file1.a OUTER JOIN - annotate with file1.a + only file2.0 matches, INNER-JOIN - skip file2.a OUTER JOIN - annotate with file2.a + + */ + + private VCFWriter vcfWriter; private HashMap nonVCFsampleName = new HashMap();