Adding docs to 3 more walkers

This commit is contained in:
Eric Banks 2011-08-17 12:35:08 -04:00
parent b3b5d608ca
commit a21e193a9e
3 changed files with 89 additions and 8 deletions

View File

@ -35,16 +35,46 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
/**
* Left aligns indels in reads.
* Left-aligns indels from reads in a bam file.
*
* <p>
* LeftAlignIndels is a tool that takes a bam file and left-aligns any indels inside it. The same indel can often be
* placed at multiple positions and still represent the same haplotype. While a standard convention is to place an
* indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
*
* <h2>Input</h2>
* <p>
* A bam file to left-align.
* </p>
*
* <h2>Output</h2>
* <p>
* A left-aligned bam.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx3g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T LeftAlignIndels \
* -I input.bam \
* -o output.vcf
* </pre>
*
*/
public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
@Output(required=false, doc="Output bam")
protected StingSAMFileWriter writer = null;
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter. "+
"If too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool may run out of memory.", required=false)
/**
* If set too low, the tool may run out of system file descriptors needed to perform sorting; if too high, the tool
* may run out of memory. We recommend that you additionally tell Java to use a temp directory with plenty of available
* space (by setting java.io.tempdir on the command-line).
*/
@Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time by the output writer", required=false)
protected int MAX_RECORDS_IN_RAM = 500000;
public void initialize() {

View File

@ -46,6 +46,31 @@ import java.util.*;
/**
* Left-aligns indels from a variants file.
*
* <p>
* LeftAlignVariants is a tool that takes a VCF file and left-aligns any indels inside it. The same indel can often be
* placed at multiple positions and still represent the same haplotype. While the standard convention with VCF is to
* place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
*
* <h2>Input</h2>
* <p>
* A variant set to left-align.
* </p>
*
* <h2>Output</h2>
* <p>
* A left-aligned VCF.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T LeftAlignVariants \
* --variant input.vcf \
* -o output.vcf
* </pre>
*
*/
@Reference(window=@Window(start=-200,stop=200))
public class LeftAlignVariants extends RodWalker<Integer, Integer> {

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.commandline.*;
@ -34,7 +33,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -48,7 +46,32 @@ import java.util.Set;
/**
* Validates a variants file.
* Strictly validates a variants file.
*
* <p>
* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it.
* Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs
* when a dbSNP file is provided, and that all alternate alleles are present in at least one sample.
*
* <h2>Input</h2>
* <p>
* A variant set to filter.
* </p>
*
* <h2>Output</h2>
* <p>
* A filtered VCF.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ValidateVariants \
* --variant input.vcf \
* --dbsnp dbsnp.vcf
* </pre>
*
*/
@Reference(window=@Window(start=0,stop=100))
public class ValidateVariants extends RodWalker<Integer, Integer> {
@ -67,10 +90,13 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
@Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false)
protected ValidationType type = ValidationType.ALL;
@Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "should we skip validation on filtered records?", required = false)
/**
* By default, even filtered records are validated.
*/
@Argument(fullName = "doNotValidateFilteredRecords", shortName = "doNotValidateFilteredRecords", doc = "skip validation on filtered records", required = false)
protected Boolean DO_NOT_VALIDATE_FILTERED = false;
@Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "should we just emit warnings on errors instead of terminating the run?", required = false)
@Argument(fullName = "warnOnErrors", shortName = "warnOnErrors", doc = "just emit warnings on errors instead of terminating the run at the first instance", required = false)
protected Boolean WARN_ON_ERROR = false;
private long numErrors = 0;