From 6e828260a019b18134ae27950f7a1faa08319af0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 16 Aug 2011 16:13:47 -0400 Subject: [PATCH 01/15] Removed -B support. Now explodes with error if -B provided. --- .../sting/gatk/CommandLineExecutable.java | 9 ++++----- .../sting/gatk/arguments/GATKArgumentCollection.java | 10 ++-------- .../gatk/arguments/GATKArgumentCollectionUnitTest.java | 8 -------- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 32132c7ca..32002e093 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -96,24 +96,23 @@ public abstract class CommandLineExecutable extends CommandLineProgram { loadArgumentsIntoObject(walker); argumentSources.add(walker); - Collection newStyle = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser); + Collection rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser); // todo: remove me when the old style system is removed if ( getArgumentCollection().RODBindings.size() > 0 ) { logger.warn("################################################################################"); logger.warn("################################################################################"); - logger.warn("Deprecated -B rod binding syntax detected. This syntax will be retired in GATK 1.2."); + logger.warn("Deprecated -B rod binding syntax detected. This syntax has been eliminated in GATK 1.2."); logger.warn("Please use arguments defined by each specific walker instead."); for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) { logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags()); } logger.warn("################################################################################"); logger.warn("################################################################################"); + System.exit(1); } - Collection oldStyle = ListFileUtils.unpackRODBindingsOldStyle(getArgumentCollection().RODBindings, parser); - oldStyle.addAll(newStyle); - engine.setReferenceMetaDataFiles(oldStyle); + engine.setReferenceMetaDataFiles(rodBindings); for (ReadFilter filter: filters) { loadArgumentsIntoObject(filter); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 62135f21b..fd39d46b0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -101,6 +101,8 @@ public class GATKArgumentCollection { @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) public File referenceFile = null; + @Deprecated + @Hidden @ElementList(required = false) @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :, ", required = false) public ArrayList RODBindings = new ArrayList(); @@ -340,14 +342,6 @@ public class GATKArgumentCollection { return false; } } - if (other.RODBindings.size() != RODBindings.size()) { - return false; - } - for (int x = 0; x < RODBindings.size(); x++) { - if (!RODBindings.get(x).equals(other.RODBindings.get(x))) { - return false; - } - } if (!other.samFiles.equals(this.samFiles)) { return false; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java index f3e868474..3a242cb13 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java @@ -88,14 +88,6 @@ public class GATKArgumentCollectionUnitTest extends BaseTest { collect.intervals.add("intervals".toLowerCase()); collect.excludeIntervals = new ArrayList(); collect.numberOfThreads = 1; - - // make some rod bindings up - ArrayList fakeBindings = new ArrayList(); - fakeBindings.add("Bind1"); - fakeBindings.add("Bind2"); - fakeBindings.add("Bind3"); - - collect.RODBindings = fakeBindings; } From 946f5c53fe1bc3f648d4571bf283d12f8e0195e9 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 16 Aug 2011 16:26:26 -0400 Subject: [PATCH 02/15] Adding docs to more walkers --- .../arguments/DbsnpArgumentCollection.java | 3 +- .../walkers/annotator/VariantAnnotator.java | 55 ++++++++++---- .../filters/VariantFiltrationWalker.java | 74 +++++++++++++++++-- .../varianteval/VariantEvalWalker.java | 55 +++++++++++++- 4 files changed, 161 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java index ce638ff2b..2f4dd06e2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java @@ -39,8 +39,7 @@ import org.simpleframework.xml.*; public class DbsnpArgumentCollection { /** - * A dbSNP VCF file. Variants in this track will be treated as "known" variants - * in tools using this track. + * A dbSNP VCF file. */ @Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false) public RodBinding dbsnp; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 8c8bd19d0..96a400c68 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -49,7 +49,34 @@ import java.util.*; /** - * Annotates variant calls with context information. Users can specify which of the available annotations to use. + * Annotates variant calls with context information. + * + *

+ * VariantAnnotator is a GATK tool for annotating variant calls based on their context. + * The tool is modular; new annotations can be written easily without modifying VariantAnnotator itself. + * + *

Input

+ *

+ * A variant set to annotate and optionally one or more BAM files. + *

+ * + *

Output

+ *

+ * An annotated VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantAnnotator \
+ *   -I input.bam \
+ *   -o output.vcf \
+ *   -A DepthOfCoverage
+ *   --variant input.vcf \
+ *   --dbsnp dbsnp.vcf
+ * 
+ * */ @Requires(value={}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @@ -69,8 +96,6 @@ public class VariantAnnotator extends RodWalker implements Ann public RodBinding getSnpEffRodBinding() { return snpEffFile; } /** - * A dbSNP VCF file from which to annotate. - * * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. */ @ArgumentCollection @@ -101,15 +126,25 @@ public class VariantAnnotator extends RodWalker implements Ann @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; - @Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false) - protected String sampleName = null; - + /** + * See the -list argument to view available annotations. + */ @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false) protected List annotationsToUse = new ArrayList(); + /** + * See the -list argument to view available groups. + */ @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) protected List annotationGroupsToUse = new ArrayList(); + /** + * This option enables you to add annotations from one VCF to another. + * + * For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource', + * you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position. + * If multiple records in the rod overlap the given position, one is chosen arbitrarily. + */ @Argument(fullName="expression", shortName="E", doc="One or more specific expressions to apply to variant calls; see documentation for more details", required=false) protected List expressionsToUse = new ArrayList(); @@ -127,8 +162,6 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; - private HashMap nonVCFsampleName = new HashMap(); - private VariantAnnotatorEngine engine; private Collection indelBufferContext; @@ -164,12 +197,6 @@ public class VariantAnnotator extends RodWalker implements Ann List rodName = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); - // add the non-VCF sample from the command-line, if applicable - if ( sampleName != null ) { - nonVCFsampleName.put(sampleName.toUpperCase(), "variant"); - samples.add(sampleName.toUpperCase()); - } - // if there are no valid samples, warn the user if ( samples.size() == 0 ) { logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index c555e88cd..bf3606b54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -45,6 +45,34 @@ import java.util.*; /** * Filters variant calls using a number of user-selectable, parameterizable criteria. + * + *

+ * VariantFiltration is a GATK tool for hard-filtering variant calls based on certain criteria. + * Records are hard-filtered by changing the value in the FILTER field to something other than PASS. + * + *

Input

+ *

+ * A variant set to filter. + *

+ * + *

Output

+ *

+ * A filtered VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantFiltration \
+ *   -o output.vcf \
+ *   --variant input.vcf \
+ *   --filterExpression "AB < 0.2 || MQ0 > 50" \
+ *   --filterName "Nov09filters" \
+ *   --mask mask.vcf \
+ *   --maskName InDel
+ * 
+ * */ @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltrationWalker extends RodWalker { @@ -52,33 +80,65 @@ public class VariantFiltrationWalker extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + /** + * Any variant which overlaps entries from the provided mask rod will be filtered. + */ @Input(fullName="mask", doc="Input ROD mask", required=false) public RodBinding mask; @Output(doc="File to which variants should be written", required=true) protected VCFWriter writer = null; - @Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false) + /** + * VariantFiltration accepts any number of JEXL expressions (so you can have two named filters by using + * --filterName One --filterExpression "X < 1" --filterName Two --filterExpression "X > 2"). + */ + @Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter", required=false) protected ArrayList FILTER_EXPS = new ArrayList(); - @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false) + + /** + * This name is put in the FILTER field for variants that get filtered. Note that there must be a 1-to-1 mapping between filter expressions and filter names. + */ + @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters", required=false) protected ArrayList FILTER_NAMES = new ArrayList(); + /** + * Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead. + * VariantFiltration will add the sample-level FT tag to the FORMAT field of filtered samples (this does not affect the record's FILTER tag). + * One can filter normally based on most fields (e.g. "GQ < 5.0"), but the GT (genotype) field is an exception. We have put in convenience + * methods so that one can now filter out hets ("isHet == 1"), refs ("isHomRef == 1"), or homs ("isHomVar == 1"). + */ @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false) protected ArrayList GENOTYPE_FILTER_EXPS = new ArrayList(); + + /** + * Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead. + */ @Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false) protected ArrayList GENOTYPE_FILTER_NAMES = new ArrayList(); - @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]", required=false) + /** + * Works together with the --clusterWindowSize argument. + */ + @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster", required=false) protected Integer clusterSize = 3; - @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false) + + /** + * Works together with the --clusterSize argument. To disable the clustered SNP filter, set this value to less than 1. + */ + @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs", required=false) protected Integer clusterWindow = 0; - @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false) + @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered", required=false) protected Integer MASK_EXTEND = 0; - @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) + @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) protected String MASK_NAME = "Mask"; - @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false) + /** + * By default, if JEXL cannot evaluate your expression for a particular record because one of the annotations is not present, the whole expression evaluates as PASSing. + * Use this argument to have it evaluate as failing filters instead for these cases. + */ + @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false) protected Boolean FAIL_MISSING_VALUES = false; // JEXL expressions for the filters diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 633c21320..d1fa3f4df 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -36,20 +36,61 @@ import java.util.*; /** * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) + * + *

+ * Given a variant callset, it is common to calculate various quality control metrics. These metrics include the number of + * raw or filtered SNP counts; ratio of transition mutations to transversions; concordance of a particular sample's calls + * to a genotyping chip; number of singletons per sample; etc. Furthermore, it is often useful to stratify these metrics + * by various criteria like functional class (missense, nonsense, silent), whether the site is CpG site, the amino acid + * degeneracy of the site, etc. VariantEval facilitates these calculations in two ways: by providing several built-in + * evaluation and stratification modules, and by providing a framework that permits the easy development of new evaluation + * and stratification modules. + * + *

Input

+ *

+ * One or more variant sets to evaluate plus any number of comparison sets. + *

+ * + *

Output

+ *

+ * Evaluation tables. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantEval \
+ *   -o output.eval.gatkreport \
+ *   --eval:set1 set1.vcf \
+ *   --eval:set2 set2.vcf \
+ *   [--comp comp.vcf]
+ * 
+ * */ @Reference(window=@Window(start=-50, stop=50)) public class VariantEvalWalker extends RodWalker implements TreeReducible { - // Output arguments + @Output protected PrintStream out; + /** + * The variant file(s) to evaluate. + */ @Input(fullName="eval", shortName = "eval", doc="Input evaluation file(s)", required=true) public List> evals; + /** + * The variant file(s) to compare against. + */ @Input(fullName="comp", shortName = "comp", doc="Input comparison file(s)", required=false) public List> compsProvided = Collections.emptyList(); private List> comps = new ArrayList>(); + /** + * dbSNP comparison VCF. By default, the dbSNP file is used to specify the set of "known" variants. + * Other sets can be specified with the -knownName (--known_names) argument. + */ @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); @@ -67,6 +108,9 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="sample", shortName="sn", doc="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context", required=false) protected Set SAMPLE_EXPRESSIONS; + /** + * List of rod tracks to be used for specifying "known" variants other than dbSNP. + */ @Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) protected HashSet KNOWN_NAMES = new HashSet(); List> knowns = new ArrayList>(); @@ -81,7 +125,9 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false) protected Set typesToUse = null; - // Evaluator arguments + /** + * See the -list argument to view available modules. + */ @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false) protected String[] MODULES_TO_USE = {}; @@ -95,7 +141,10 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false) protected double MIN_PHASE_QUALITY = 10.0; - @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + /** + * This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined. + */ + @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false) protected String FAMILY_STRUCTURE; @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) From fadcbf68fd72d936f7b4532b57bf37ae98ea10d1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 17 Aug 2011 09:39:33 -0400 Subject: [PATCH 03/15] Adding docs to QC walkers --- .../sting/gatk/walkers/PrintReadsWalker.java | 51 ++++++++++++++++--- .../coverage/GCContentByIntervalWalker.java | 24 ++++++++- .../gatk/walkers/qc/CountLociWalker.java | 24 +++++++++ .../gatk/walkers/qc/CountPairsWalker.java | 20 ++++++++ .../gatk/walkers/qc/CountReadsWalker.java | 24 +++++++++ .../sting/gatk/walkers/qc/CountRodWalker.java | 42 ++++++++++++--- 6 files changed, 169 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 7e1dcd707..fdfac6bf7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -40,26 +40,65 @@ import java.util.TreeSet; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; + /** - * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear - * in the input file. It can dynamically merge the contents of multiple input BAM files, resulting - * in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter - * command line argument. + * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file. + * + *

+ * PrintReads can dynamically merge the contents of multiple input BAM files, resulting + * in merged output sorted in coordinate order. Can also optionally filter reads based on the + * --read_filter command line argument. + * + *

Input

+ *

+ * One or more bam files. + *

+ * + *

Output

+ *

+ * A single processed bam file. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T PrintReads \
+ *   -o output.bam \
+ *   -I input1.bam \
+ *   -I input2.bam \
+ *   --read_filter MappingQualityZero
+ * 
+ * */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @Requires({DataSource.READS, DataSource.REFERENCE}) public class PrintReadsWalker extends ReadWalker { - /** an optional argument to dump the reads out to a BAM file */ + @Output(doc="Write output to this BAM filename instead of STDOUT") SAMFileWriter out; + @Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false) String readGroup = null; + + /** + * For example, --platform ILLUMINA or --platform 454. + */ @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false) - String platform = null; // E.g. ILLUMINA, 454 + String platform = null; + @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + + /** + * Only reads from samples listed in the provided file(s) will be included in the output. + */ @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) public Set sampleFile = new TreeSet(); + + /** + * Only reads from the sample(s) will be included in the output. + */ @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) public Set sampleNames = new TreeSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java index a4944e939..5c2a967b9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java @@ -38,12 +38,32 @@ import java.util.List; /** * Walks along reference and calculates the GC content for each interval. + * + * + *

Input

+ *

+ * One or more BAM files. + *

+ * + *

Output

+ *

+ * GC content calculations per interval. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T GCContentByInterval \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   -L input.intervals
+ * 
+ * */ @Allows(value = {DataSource.REFERENCE}) @Requires(value = {DataSource.REFERENCE}) - @By(DataSource.REFERENCE) - public class GCContentByIntervalWalker extends LocusWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java index 0d68c8493..09113704a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLociWalker.java @@ -11,7 +11,31 @@ import java.io.PrintStream; /** * Walks over the input data set, calculating the total number of covered loci for diagnostic purposes. + * + *

* Simplest example of a locus walker. + * + * + *

Input

+ *

+ * One or more BAM files. + *

+ * + *

Output

+ *

+ * Number of loci traversed. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountLoci \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   [-L input.intervals]
+ * 
+ * */ public class CountLociWalker extends LocusWalker implements TreeReducible { @Output(doc="Write count to this file instead of STDOUT") diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java index 26fa9a258..e770418c1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java @@ -39,6 +39,26 @@ import java.util.List; * query name order. Breaks counts down by total pairs and number * of paired reads. * + * + *

Input

+ *

+ * One or more bam files. + *

+ * + *

Output

+ *

+ * Number of pairs seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountPairs \
+ *   -o output.txt \
+ *   -I input.bam
+ * 
+ * * @author mhanna */ public class CountPairsWalker extends ReadPairWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 87c0409b9..9ce9c4eec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -9,8 +9,32 @@ import org.broadinstitute.sting.gatk.walkers.Requires; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. + * + *

* Can also count the number of reads matching a given criterion using read filters (see the * --read-filter command line argument). Simplest example of a read-backed analysis. + * + * + *

Input

+ *

+ * One or more BAM files. + *

+ * + *

Output

+ *

+ * Number of reads seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountReads \
+ *   -o output.txt \
+ *   -I input.bam \
+ *   [-L input.intervals]
+ * 
+ * */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountReadsWalker extends ReadWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java index 8a03dea44..04d04c2c4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java @@ -27,8 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -41,23 +44,46 @@ import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; +import java.util.*; /** - * Prints out counts of the number of reference ordered data objects are - * each locus for debugging RodWalkers. + * Prints out counts of the number of reference ordered data objects encountered. + * + * + *

Input

+ *

+ * One or more rod files. + *

+ * + *

Output

+ *

+ * Number of rods seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountRod \
+ *   -o output.txt \
+ *   --rod input.vcf
+ * 
+ * */ public class CountRodWalker extends RodWalker, Long>> implements TreeReducible, Long>> { @Output public PrintStream out; - @Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false) + /** + * One or more input rod files + */ + @Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=false) + public List> rods = Collections.emptyList(); + + @Argument(fullName = "verbose", shortName = "v", doc="If true, CountRod will print out detailed information about the rods it finds and locations", required = false) public boolean verbose = false; - @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false) + @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skipped locations", required = false) public boolean showSkipped = false; @Override From b3b5d608caf8b0db652111a4581fc3c609cce277 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 17 Aug 2011 09:57:19 -0400 Subject: [PATCH 04/15] Adding docs to yet more walkers --- .../fasta/FastaAlternateReferenceWalker.java | 36 ++++++++++++++++-- .../walkers/fasta/FastaReferenceWalker.java | 38 +++++++++++++++++-- .../walkers/variantutils/VariantsToVCF.java | 35 ++++++++++++++++- 3 files changed, 101 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index a57fabc39..8f333a2b3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -40,18 +40,48 @@ import java.util.List; /** - * Generates an alternative reference sequence over the specified interval. Given variant ROD tracks, - * it replaces the reference bases at variation sites with the bases supplied by the ROD(s). Additionally, - * allows for a "snpmask" ROD to set overlapping bases to 'N'. + * Generates an alternative reference sequence over the specified interval. + * + *

+ * Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s). + * Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'. + * + *

Input

+ *

+ * The reference, requested intervals, and any number of variant rod files. + *

+ * + *

Output

+ *

+ * A fasta file representing the requested intervals. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T FastaAlternateReferenceMaker \
+ *   -o output.fasta \
+ *   -L input.intervals \
+ *   --variant input.vcf \
+ *   [--snpmask mask.vcf]
+ * 
+ * */ @WalkerName("FastaAlternateReferenceMaker") @Reference(window=@Window(start=-1,stop=50)) @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { + /** + * Variants from these input files are used by this tool to construct an alternate reference. + */ @Input(fullName = "variant", shortName = "V", doc="variants to model", required=false) public List> variants = Collections.emptyList(); + /** + * Snps from this file are used as a mask when constructing the alternate reference. + */ @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) public RodBinding snpmask; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java index 2dbfc76ff..5f3b37cc8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java @@ -38,14 +38,44 @@ import org.broadinstitute.sting.utils.collections.Pair; import java.io.PrintStream; /** - * Renders a new reference in FASTA format consisting of only those loci provided in the input data set. Has optional - * features to control the output format. + * Renders a new reference in FASTA format consisting of only those loci provided in the input data set. + * + *

+ * The output format can be partially controlled using the provided command-line arguments. + * + *

Input

+ *

+ * The reference and requested intervals. + *

+ * + *

Output

+ *

+ * A fasta file representing the requested intervals. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T FastaReference \
+ *   -o output.fasta \
+ *   -L input.intervals
+ * 
+ * */ @WalkerName("FastaReferenceMaker") public class FastaReferenceWalker extends RefWalker, GenomeLoc> { + @Output PrintStream out; - @Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false) public int fastaLineWidth=60; - @Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity) - CAUTION: adjacent intervals will automatically be merged", required=false) public boolean fastaRawSeqs=false; + + @Argument(fullName="lineWidth", shortName="lw", doc="Maximum length of sequence to write per line", required=false) + public int fastaLineWidth=60; + + /** + * Please note that when using this argument adjacent intervals will automatically be merged. + */ + @Argument(fullName="rawOnelineSeq", shortName="raw", doc="Print sequences with no FASTA header lines, one line per interval (i.e. lineWidth = infinity)", required=false) + public boolean fastaRawSeqs=false; protected FastaSequence fasta; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 1684dccfb..61851abe2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -53,6 +53,30 @@ import java.util.*; /** * Converts variants from other file formats to VCF format. + * + *

+ * Note that there must be a Tribble feature/codec for the file format as well as an adaptor. + * + *

Input

+ *

+ * A variant file to filter. + *

+ * + *

Output

+ *

+ * A VCF file. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantsToVCF \
+ *   -o output.vcf \
+ *   --variant:RawHapMap input.hapmap \
+ *   --dbsnp dbsnp.vcf
+ * 
+ * */ @Reference(window=@Window(start=-40,stop=40)) public class VariantsToVCF extends RodWalker { @@ -61,15 +85,24 @@ public class VariantsToVCF extends RodWalker { protected VCFWriter baseWriter = null; private SortingVCFWriter vcfwriter; // needed because hapmap/dbsnp indel records move + /** + * Variants from this input file are used by this tool as input. + */ @Input(fullName="variant", shortName = "V", doc="Input variant file", required=true) public RodBinding variants; @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); - @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) + /** + * This argument is used for data (like GELI) with genotypes but no sample names encoded within. + */ + @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod", required=false) protected String sampleName = null; + /** + * This argument is useful for fixing input VCFs with bad reference bases (the output will be a fixed version of the VCF). + */ @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) protected boolean fixReferenceBase = false; From 79dcfca25fdedd3aea29e36a9e8a2736c601d717 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Aug 2011 11:56:51 -0400 Subject: [PATCH 06/15] Fixed bad character in documentation --- .../sting/gatk/walkers/indels/IndelRealigner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 029b6deaf..d766ae8bd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -69,7 +69,7 @@ import java.util.*; *

* The local realignment tool is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases * is minimized across all the reads. In general, a large percent of regions requiring local realignment are due to the presence of an insertion - * or deletion (indels) in the individualÕs genome with respect to the reference genome. Such alignment artifacts result in many bases mismatching + * or deletion (indels) in the individual's genome with respect to the reference genome. Such alignment artifacts result in many bases mismatching * the reference near the misalignment, which are easily mistaken as SNPs. Moreover, since read mapping algorithms operate on each read independently, * it is impossible to place reads on the reference genome such at mismatches are minimized across all reads. Consequently, even when some reads are * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel, From 78deb3f19595816d08fbde35db5f62189727048e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Aug 2011 11:57:00 -0400 Subject: [PATCH 07/15] Fixed bad character in documentation --- .../walkers/variantutils/VariantsToTable.java | 226 ++++++++++++------ 1 file changed, 153 insertions(+), 73 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index af3593ce4..51515b2d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -40,29 +40,97 @@ import java.io.PrintStream; import java.util.*; /** - * Emits specific fields as dictated by the user from one or more VCF files. + * Emits specific fields from a VCF file to a table-deliminated format + * + *

+ * This walker accepts a single VCF file and writes out user-selected fields from the + * VCF as a header-containing, tab-deliminated file. The user specifies one or more + * fields to print with the -F NAME, each of which appears as a single column in + * the output file, with a header named NAME, and the value of this field in the VCF + * one per line. NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding + * in the INFO field (AC=10). Note that this tool does not support capturing any + * GENOTYPE field values. If a VCF record is missing a value, then the tool by + * default throws an error, but the special value NA can be emitted instead with + * appropriate tool arguments. + * + *

+ * + *

Input

+ *

+ *

    + *
  • A VCF file
  • + *
  • A list of -F fields to write
  • + *
+ *

+ * + *

Output

+ *

+ * A table deliminated file containing the values of the requested fields in the VCF file + *

+ * + *

Examples

+ *
+ *     -T $WalkerName \
+ *     -V file.vcf \
+ *     -F CHROM -F POS -F ID -F QUAL -F AC \
+ *     -o results.table
+ *
+ *     would produce a file that looks like:
+ *
+ *     CHROM    POS ID      QUAL    AC
+ *     1        10  .       50      1
+ *     1        20  rs10    99      10
+ *     et cetera...
+ * 
+ * + * @author Mark DePristo + * @since 2010 */ public class VariantsToTable extends RodWalker { - @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(doc="File to which results should be written",required=true) protected PrintStream out; - @Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true) - public ArrayList fieldsToTake = new ArrayList(); + /** + * -F NAME can be any standard VCF column (CHROM, ID, QUAL) or any binding in the INFO field (e.g., AC=10). + * Note that this tool does not support capturing any GENOTYPE field values. Note this argument + * accepts any number of inputs. So -F CHROM -F POS is allowed. + */ + @Argument(fullName="fields", shortName="F", doc="The name of each field to capture for output in the table", required=true) + public List fieldsToTake = new ArrayList(); - @Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records") + /** + * By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered). + * Throwing this flag will cause $WalkerName to emit values regardless of the FILTER field value. + */ + @Argument(fullName="showFiltered", shortName="raw", doc="If provided, field values from filtered records will be included in the output", required=false) public boolean showFiltered = false; - @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false) + /** + * If provided, then this tool will exit with success after this number of records have been emitted to the file. + */ + @Argument(fullName="maxRecords", shortName="M", doc="If provided, we will emit at most maxRecord records to the table", required=false) public int MAX_RECORDS = -1; int nRecords = 0; + /** + * By default, only biallelic (REF=A, ALT=B) sites are including in the output. If this flag is provided, then + * VariantsToTable will emit field values for records with multiple ALT alleles. Note that in general this + * can make your resulting file unreadable and malformated according to tools like R, as the representation of + * multi-allelic INFO field values can be lists of values. + */ @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) public boolean keepMultiAllelic = false; + /** + * By default, this tool throws a UserException when it encounters a field without a value in some record. This + * is generally useful when you mistype -F CHRMO, so that you get a friendly warning about CHRMO not being + * found before the tool runs through 40M 1000G records. However, in some cases you genuinely want to allow such + * fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument + * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error. + */ @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false) public boolean ALLOW_MISSING_DATA = false; @@ -70,65 +138,6 @@ public class VariantsToTable extends RodWalker { out.println(Utils.join("\t", fieldsToTake)); } - public static abstract class Getter { public abstract String get(VariantContext vc); } - public static Map getters = new HashMap(); - - static { - // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT - getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); - getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); - getters.put("REF", new Getter() { - public String get(VariantContext vc) { - String x = ""; - if ( vc.hasReferenceBaseForIndel() ) { - Byte refByte = vc.getReferenceBaseForIndel(); - x=x+new String(new byte[]{refByte}); - } - return x+vc.getReference().getDisplayString(); - } - }); - getters.put("ALT", new Getter() { - public String get(VariantContext vc) { - StringBuilder x = new StringBuilder(); - int n = vc.getAlternateAlleles().size(); - if ( n == 0 ) return "."; - if ( vc.hasReferenceBaseForIndel() ) { - Byte refByte = vc.getReferenceBaseForIndel(); - x.append(new String(new byte[]{refByte})); - } - - for ( int i = 0; i < n; i++ ) { - if ( i != 0 ) x.append(","); - x.append(vc.getAlternateAllele(i).getDisplayString()); - } - return x.toString(); - } - }); - getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } }); - getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) { - if ( vc.isSNP() && vc.isBiallelic() ) - return VariantContextUtils.isTransition(vc) ? "1" : "0"; - else - return "-1"; - }}); - getters.put("FILTER", new Getter() { public String get(VariantContext vc) { - return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } - }); - - getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } }); - getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } }); - getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } }); - getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } }); - getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } }); - getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } }); - getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } }); - getters.put("GQ", new Getter() { public String get(VariantContext vc) { - if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF"); - return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError()); - }}); - } - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null ) // RodWalkers can make funky map calls return 0; @@ -155,6 +164,15 @@ public class VariantsToTable extends RodWalker { return s.endsWith("*"); } + /** + * Utility function that returns the list of values for each field in fields from vc. + * + * @param vc the VariantContext whose field values we can to capture + * @param fields a non-null list of fields to capture from VC + * @param allowMissingData if false, then throws a UserException if any field isn't found in vc. Otherwise + * provides a value of NA + * @return + */ public static List extractFields(VariantContext vc, List fields, boolean allowMissingData) { List vals = new ArrayList(); @@ -213,13 +231,75 @@ public class VariantsToTable extends RodWalker { return vals; } - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer counter, Integer sum) { - return counter + sum; - } - + // + // default reduce -- doesn't do anything at all + // + public Integer reduceInit() { return 0; } + public Integer reduce(Integer counter, Integer sum) { return counter + sum; } public void onTraversalDone(Integer sum) {} + + // ---------------------------------------------------------------------------------------------------- + // + // static system for getting values from VC by name. + // + // ---------------------------------------------------------------------------------------------------- + + public static abstract class Getter { public abstract String get(VariantContext vc); } + public static Map getters = new HashMap(); + + static { + // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT + getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); + getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); + getters.put("REF", new Getter() { + public String get(VariantContext vc) { + String x = ""; + if ( vc.hasReferenceBaseForIndel() ) { + Byte refByte = vc.getReferenceBaseForIndel(); + x=x+new String(new byte[]{refByte}); + } + return x+vc.getReference().getDisplayString(); + } + }); + getters.put("ALT", new Getter() { + public String get(VariantContext vc) { + StringBuilder x = new StringBuilder(); + int n = vc.getAlternateAlleles().size(); + if ( n == 0 ) return "."; + if ( vc.hasReferenceBaseForIndel() ) { + Byte refByte = vc.getReferenceBaseForIndel(); + x.append(new String(new byte[]{refByte})); + } + + for ( int i = 0; i < n; i++ ) { + if ( i != 0 ) x.append(","); + x.append(vc.getAlternateAllele(i).getDisplayString()); + } + return x.toString(); + } + }); + getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } }); + getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) { + if ( vc.isSNP() && vc.isBiallelic() ) + return VariantContextUtils.isTransition(vc) ? "1" : "0"; + else + return "-1"; + }}); + getters.put("FILTER", new Getter() { public String get(VariantContext vc) { + return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } + }); + + getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } }); + getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } }); + getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } }); + getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } }); + getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } }); + getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } }); + getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } }); + getters.put("GQ", new Getter() { public String get(VariantContext vc) { + if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF"); + return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError()); + }}); + } + } From 9d1d5bd27a5747a8dfd430ec042dee76e7fce835 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Aug 2011 11:57:31 -0400 Subject: [PATCH 08/15] Revert "Fixed bad character in documentation" This reverts commit a1f50c82d3cb25e5e83d36e9054d74cdee957d87. --- .../sting/gatk/walkers/indels/IndelRealigner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index d766ae8bd..029b6deaf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -69,7 +69,7 @@ import java.util.*; *

* The local realignment tool is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases * is minimized across all the reads. In general, a large percent of regions requiring local realignment are due to the presence of an insertion - * or deletion (indels) in the individual's genome with respect to the reference genome. Such alignment artifacts result in many bases mismatching + * or deletion (indels) in the individualÕs genome with respect to the reference genome. Such alignment artifacts result in many bases mismatching * the reference near the misalignment, which are easily mistaken as SNPs. Moreover, since read mapping algorithms operate on each read independently, * it is impossible to place reads on the reference genome such at mismatches are minimized across all reads. Consequently, even when some reads are * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel, From 5f794d16a7df2aab7e761adbe9d75e3dbdfc1dae Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Aug 2011 12:00:32 -0400 Subject: [PATCH 09/15] Fixed bad character in documentation --- .../sting/gatk/walkers/indels/IndelRealigner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 029b6deaf..d766ae8bd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -69,7 +69,7 @@ import java.util.*; *

* The local realignment tool is designed to consume one or more BAM files and to locally realign reads such that the number of mismatching bases * is minimized across all the reads. In general, a large percent of regions requiring local realignment are due to the presence of an insertion - * or deletion (indels) in the individualÕs genome with respect to the reference genome. Such alignment artifacts result in many bases mismatching + * or deletion (indels) in the individual's genome with respect to the reference genome. Such alignment artifacts result in many bases mismatching * the reference near the misalignment, which are easily mistaken as SNPs. Moreover, since read mapping algorithms operate on each read independently, * it is impossible to place reads on the reference genome such at mismatches are minimized across all reads. Consequently, even when some reads are * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel, From c6fb215faf42d04ba86341b0351613d7ef717ad3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Aug 2011 12:02:41 -0400 Subject: [PATCH 10/15] GATKDocs for VariantsToTable -- Made a previously required argument optional, as this was a long-standing bug --- .../sting/gatk/walkers/variantutils/VariantsToTable.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 51515b2d3..a37cfa6ba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -135,6 +135,7 @@ public class VariantsToTable extends RodWalker { public boolean ALLOW_MISSING_DATA = false; public void initialize() { + // print out the header out.println(Utils.join("\t", fieldsToTake)); } From 3da71a9bb6ed5384641ea98e36fd3affca87ac8a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 17 Aug 2011 12:04:45 -0400 Subject: [PATCH 11/15] Clean up summary --- .../sting/gatk/walkers/variantutils/VariantsToTable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index a37cfa6ba..b3fd540bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -40,7 +40,7 @@ import java.io.PrintStream; import java.util.*; /** - * Emits specific fields from a VCF file to a table-deliminated format + * Emits specific fields from a VCF file to a tab-deliminated table * *

* This walker accepts a single VCF file and writes out user-selected fields from the From d1bb302d12144991650f90f9ffc57bac5747c005 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Wed, 17 Aug 2011 12:21:37 -0400 Subject: [PATCH 12/15] Added GatkDocs documentation --- .../phasing/ReadBackedPhasingWalker.java | 53 +++++++++++++++---- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index ac4fba4b4..34c7912d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -23,7 +23,10 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -49,16 +52,46 @@ import java.util.*; import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods; - /** * Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads). + * + *

+ * Performs physical phasing of SNP calls, based on sequencing reads. + *

+ * + *

Input

+ *

+ * VCF file of SNP calls, BAM file of sequence reads. + *

+ * + *

Output

+ *

+ * Phased VCF file. + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T ReadBackedPhasing
+ *      -R reference.fasta
+ *      -I reads.bam
+ *      --variant:vcf SNPs.vcf
+ *      -BTI variant
+ *      -BTIMR INTERSECTION
+ *      -o phased_SNPs.vcf
+ *      --phaseQualityThresh 20.0
+ * 
+ * + * @author Menachem Fromer + * @since July 2010 */ @Allows(value = {DataSource.READS, DataSource.REFERENCE}) @Requires(value = {DataSource.READS, DataSource.REFERENCE}) @By(DataSource.READS) -@ReadFilters({MappingQualityZeroReadFilter.class}) // Filter out all reads with zero mapping quality +@ReadFilters({MappingQualityZeroReadFilter.class}) public class ReadBackedPhasingWalker extends RodWalker { private static final boolean DEBUG = false; @@ -73,13 +106,13 @@ public class ReadBackedPhasingWalker extends RodWalker P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9 @Hidden @@ -87,10 +120,10 @@ public class ReadBackedPhasingWalker extends RodWalker Date: Wed, 17 Aug 2011 12:35:08 -0400 Subject: [PATCH 13/15] Adding docs to 3 more walkers --- .../gatk/walkers/indels/LeftAlignIndels.java | 36 +++++++++++++++++-- .../variantutils/LeftAlignVariants.java | 25 +++++++++++++ .../variantutils/ValidateVariants.java | 36 ++++++++++++++++--- 3 files changed, 89 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index af8051334..17d5a8e9b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -35,16 +35,46 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.sam.AlignmentUtils; + /** - * Left aligns indels in reads. + * Left-aligns indels from reads in a bam file. + * + *

+ * LeftAlignIndels is a tool that takes a bam file and left-aligns any indels inside it. The same indel can often be + * placed at multiple positions and still represent the same haplotype. While a standard convention is to place an + * indel at the left-most position this doesn't always happen, so this tool can be used to left-align them. + * + *

Input

+ *

+ * A bam file to left-align. + *

+ * + *

Output

+ *

+ * A left-aligned bam. + *

+ * + *

Examples

+ *
+ * java -Xmx3g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T LeftAlignIndels \
+ *   -I input.bam \
+ *   -o output.vcf
+ * 
+ * */ public class LeftAlignIndels extends ReadWalker { @Output(required=false, doc="Output bam") protected StingSAMFileWriter writer = null; - @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter. "+ - "If too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool may run out of memory.", required=false) + /** + * If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool + * may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available + * space (by setting java.io.tempdir on the command-line). + */ + @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false) protected int MAX_RECORDS_IN_RAM = 500000; public void initialize() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index c47a015c6..9fae71e4e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -46,6 +46,31 @@ import java.util.*; /** * Left-aligns indels from a variants file. + * + *

+ * LeftAlignVariants is a tool that takes a VCF file and left-aligns any indels inside it. The same indel can often be + * placed at multiple positions and still represent the same haplotype. While the standard convention with VCF is to + * place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them. + * + *

Input

+ *

+ * A variant set to left-align. + *

+ * + *

Output

+ *

+ * A left-aligned VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T LeftAlignVariants \
+ *   --variant input.vcf \
+ *   -o output.vcf
+ * 
+ * */ @Reference(window=@Window(start=-200,stop=200)) public class LeftAlignVariants extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 5c7fb268c..01a6e2f70 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.*; @@ -34,7 +33,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -48,7 +46,32 @@ import java.util.Set; /** - * Validates a variants file. + * Strictly validates a variants file. + * + *

+ * ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. + * Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs + * when a dbSNP file is provided, and that all alternate alleles are present in at least one sample. + * + *

Input

+ *

+ * A variant set to filter. + *

+ * + *

Output

+ *

+ * A filtered VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T ValidateVariants \
+ *   --variant input.vcf \
+ *   --dbsnp dbsnp.vcf
+ * 
+ * */ @Reference(window=@Window(start=0,stop=100)) public class ValidateVariants extends RodWalker { @@ -67,10 +90,13 @@ public class ValidateVariants extends RodWalker { @Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false) protected ValidationType type = ValidationType.ALL; - @Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "should we skip validation on filtered records?", required = false) + /** + * By default, even filtered records are validated. + */ + @Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "skip validation on filtered records", required = false) protected Boolean DO_NOT_VALIDATE_FILTERED = false; - @Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "should we just emit warnings on errors instead of terminating the run?", required = false) + @Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "just emit warnings on errors instead of terminating the run at the first instance", required = false) protected Boolean WARN_ON_ERROR = false; private long numErrors = 0; From 6d629c176c2842414fd093546f68fdaf4c1b1c3f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 17 Aug 2011 13:27:36 -0400 Subject: [PATCH 14/15] Adding docs --- ...fWalker.java => CountRODsByRefWalker.java} | 0 ...untRodWalker.java => CountRODsWalker.java} | 0 .../VariantValidationAssessor.java | 62 ++++++++++++++----- 3 files changed, 48 insertions(+), 14 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/qc/{CountRodByRefWalker.java => CountRODsByRefWalker.java} (100%) rename public/java/src/org/broadinstitute/sting/gatk/walkers/qc/{CountRodWalker.java => CountRODsWalker.java} (100%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java similarity index 100% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodByRefWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java similarity index 100% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 6ed0bbd16..b98646270 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -25,10 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -43,21 +41,57 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; /** - * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes) + * Annotates a validation (from e.g. Sequenom) VCF with QC metrics (HW-equilibrium, % failed probes) + * + *

+ * The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes). + * The tool produces a VCF that is annotated with information pertaining to plate quality control and by + * default is soft-filtered by high no-call rate or low Hardy-Weinberg probability. + * If you have .ped files, please first convert them to VCF format + * (see http://www.broadinstitute.org/gsa/wiki/index.php/Converting_ped_to_vcf). + * + *

Input

+ *

+ * A validation VCF to annotate. + *

+ * + *

Output

+ *

+ * An annotated VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T VariantValidationAssessor \
+ *   --variant input.vcf \
+ *   -o output.vcf
+ * 
+ * */ @Reference(window=@Window(start=0,stop=40)) public class VariantValidationAssessor extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfwriter = null; - @Argument(fullName="maxHardy", doc="Maximum phred-scaled Hardy-Weinberg violation pvalue to consider an assay valid [default:20]", required=false) + @Argument(fullName="maxHardy", doc="Maximum phred-scaled Hardy-Weinberg violation pvalue to consider an assay valid", required=false) protected double maxHardy = 20.0; - @Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a fraction) to consider an assay valid [default:0.05]", required=false) + + /** + * To disable, set to a value greater than 1. + */ + @Argument(fullName="maxNoCall", doc="Maximum no-call rate (as a fraction) to consider an assay valid", required=false) protected double maxNoCall = 0.05; - @Argument(fullName="maxHomVar", doc="Maximum homozygous variant rate (as a fraction) to consider an assay valid [default:1.1, disabled]", required=false) + + /** + * To disable, set to a value greater than 1. + */ + @Argument(fullName="maxHomVar", doc="Maximum homozygous variant rate (as a fraction) to consider an assay valid", required=false) protected double maxHomNonref = 1.1; //@Argument(fullName="populationFile", shortName="populations", doc="A tab-delimited file relating individuals to populations,"+ @@ -93,7 +127,7 @@ public class VariantValidationAssessor extends RodWalker if ( tracker == null ) return null; - VariantContext vc = tracker.getFirstValue(variants, ref.getLocus()); + VariantContext vc = tracker.getFirstValue(variantCollection.variants, ref.getLocus()); // ignore places where we don't have a variant if ( vc == null ) return null; @@ -101,7 +135,7 @@ public class VariantValidationAssessor extends RodWalker if ( sampleNames == null ) sampleNames = new TreeSet(vc.getSampleNames()); - return addVariantInformationToCall(ref, vc); + return addVariantInformationToCall(vc); } public Integer reduce(VariantContext call, Integer numVariants) { @@ -113,7 +147,7 @@ public class VariantValidationAssessor extends RodWalker } public void onTraversalDone(Integer finalReduce) { - final List inputNames = Arrays.asList(variants.getName()); + final List inputNames = Arrays.asList(variantCollection.variants.getName()); // setup the header fields Set hInfo = new HashSet(); @@ -159,7 +193,7 @@ public class VariantValidationAssessor extends RodWalker } - private VariantContext addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) { + private VariantContext addVariantInformationToCall(VariantContext vContext) { // check possible filters double hwPvalue = hardyWeinbergCalculation(vContext); From 575303ae6b2563b8bb5380926d9fe813846bb07b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 17 Aug 2011 13:28:19 -0400 Subject: [PATCH 15/15] Renaming for consistency and bringing up to speed with new rod system --- .../gatk/walkers/qc/CountRODsByRefWalker.java | 49 ++++++++++++++++--- .../gatk/walkers/qc/CountRODsWalker.java | 8 +-- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java index d1545f159..7c7d6417a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRefWalker.java @@ -25,7 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,25 +36,55 @@ import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; +import java.util.Collections; +import java.util.List; + /** - * Prints out counts of the number of reference ordered data objects are - * each locus for debugging RefWalkers. + * Prints out counts of the number of reference ordered data objects encountered. + * + * + *

Input

+ *

+ * One or more rod files. + *

+ * + *

Output

+ *

+ * Number of rods seen. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CountRODsByRef \
+ *   -o output.txt \
+ *   --rod input.vcf
+ * 
+ * */ -public class CountRodByRefWalker extends RefWalker, Long>> { - @Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false) +public class CountRODsByRefWalker extends RefWalker, Long>> { + + /** + * One or more input rod files + */ + @Input(fullName="rod", shortName = "rod", doc="Input VCF file(s)", required=false) + public List> rods = Collections.emptyList(); + + @Argument(fullName = "verbose", shortName = "v", doc="If true, this tool will print out detailed information about the rods it finds and locations", required = false) public boolean verbose = false; - @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false) + @Argument(fullName = "showSkipped", shortName = "s", doc="If true, this tool will print out the skipped locations", required = false) public boolean showSkipped = false; - CountRodWalker crw = new CountRodWalker(); + CountRODsWalker crw = new CountRODsWalker(); public void initialize() { crw.verbose = verbose; crw.showSkipped = showSkipped; } - public CountRodWalker.Datum map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public CountRODsWalker.Datum map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return crw.map(tracker, ref, context); } @@ -59,7 +92,7 @@ public class CountRodByRefWalker extends RefWalker, Long> reduce(CountRodWalker.Datum point, Pair, Long> sum) { + public Pair, Long> reduce(CountRODsWalker.Datum point, Pair, Long> sum) { return crw.reduce(point, sum); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java index 04d04c2c4..edbd5ff75 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsWalker.java @@ -64,13 +64,13 @@ import java.util.*; *
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T CountRod \
+ *   -T CountRODs \
  *   -o output.txt \
  *   --rod input.vcf
  * 
* */ -public class CountRodWalker extends RodWalker, Long>> implements TreeReducible, Long>> { +public class CountRODsWalker extends RodWalker, Long>> implements TreeReducible, Long>> { @Output public PrintStream out; @@ -80,10 +80,10 @@ public class CountRodWalker extends RodWalker> rods = Collections.emptyList(); - @Argument(fullName = "verbose", shortName = "v", doc="If true, CountRod will print out detailed information about the rods it finds and locations", required = false) + @Argument(fullName = "verbose", shortName = "v", doc="If true, this tool will print out detailed information about the rods it finds and locations", required = false) public boolean verbose = false; - @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skipped locations", required = false) + @Argument(fullName = "showSkipped", shortName = "s", doc="If true, this tool will print out the skipped locations", required = false) public boolean showSkipped = false; @Override