Takes a refGene table ( -B arg must be: -B refgene,AnnotatorInfoTable,/path/to/refgene_file.txt) and generates the big table of nucleotides containing annotations for each possible variant at each transcript position (eg. 4 variants for each position).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3237 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
20379c3f82
commit
653e08c0b6
|
|
@ -23,16 +23,12 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This plugin for {@link VariantAnnotatorEngine} serves as the core
|
||||
* of the {@link GenomicAnnotator}. It finds all records in the -B input files
|
||||
* that match the given variant's position and, optionally, it's reference and alternate alleles.
|
||||
* Whether or not matching is done by reference and alternate alleles for a particular input file
|
||||
* based solely on whether the given -B input has columns named "haplotypeReference" and
|
||||
* "haplotypeAlternate".
|
||||
* that match the given variant's position and, optionally, the variant's reference and alternate alleles.
|
||||
*
|
||||
* For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator
|
||||
*/
|
||||
public class GenomicAnnotation implements InfoFieldAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
|
@ -42,31 +43,26 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.Allows;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
|
||||
/**
|
||||
* Annotates variant calls with information from user-specified tabular files.
|
||||
*
|
||||
* For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator
|
||||
*/
|
||||
//@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@Reference(window=@Window(start=-50,stop=50))
|
||||
//@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
//@Reference(window=@Window(start=-50,stop=50))
|
||||
@By(DataSource.REFERENCE)
|
||||
public class GenomicAnnotator extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true)
|
||||
|
|
@ -81,6 +77,18 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false)
|
||||
protected Boolean ONE_TO_MANY = false;
|
||||
|
||||
@Argument(fullName="join", shortName="j", doc="TODO If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false)
|
||||
protected String[] JOIN_COLUMNS = {};
|
||||
|
||||
/*
|
||||
NOTE: there are several cases for file1.a=file2.b
|
||||
if, for a particular locus:
|
||||
only file1.0 matches, INNER-JOIN - skip file1.a OUTER JOIN - annotate with file1.a
|
||||
only file2.0 matches, INNER-JOIN - skip file2.a OUTER JOIN - annotate with file2.a
|
||||
|
||||
*/
|
||||
|
||||
|
||||
private VCFWriter vcfWriter;
|
||||
|
||||
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue