Added GatkDocs documentation

This commit is contained in:
Menachem Fromer 2011-08-17 12:21:37 -04:00
parent b3b5d608ca
commit d1bb302d12
1 changed files with 43 additions and 10 deletions

View File

@ -23,7 +23,10 @@
*/
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -49,16 +52,46 @@ import java.util.*;
import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods;
/**
* Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads).
*
* <p>
* Performs physical phasing of SNP calls, based on sequencing reads.
* </p>
*
* <h2>Input</h2>
* <p>
* VCF file of SNP calls, BAM file of sequence reads.
* </p>
*
* <h2>Output</h2>
* <p>
* Phased VCF file.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T ReadBackedPhasing
* -R reference.fasta
* -I reads.bam
* --variant:vcf SNPs.vcf
* -BTI variant
* -BTIMR INTERSECTION
* -o phased_SNPs.vcf
* --phaseQualityThresh 20.0
* </pre>
*
* @author Menachem Fromer
* @since July 2010
*/
@Allows(value = {DataSource.READS, DataSource.REFERENCE})
@Requires(value = {DataSource.READS, DataSource.REFERENCE})
@By(DataSource.READS)
@ReadFilters({MappingQualityZeroReadFilter.class})
// Filter out all reads with zero mapping quality
@ReadFilters({MappingQualityZeroReadFilter.class})
public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
private static final boolean DEBUG = false;
@ -73,13 +106,13 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
@Output(doc = "File to which variants should be written", required = true)
protected VCFWriter writer = null;
@Argument(fullName = "cacheWindowSize", shortName = "cacheWindow", doc = "The window size (in bases) to cache variant sites and their reads; [default:20000]", required = false)
@Argument(fullName = "cacheWindowSize", shortName = "cacheWindow", doc = "The window size (in bases) to cache variant sites and their reads for the phasing procedure", required = false)
protected Integer cacheWindow = 20000;
@Argument(fullName = "maxPhaseSites", shortName = "maxSites", doc = "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm; [default:10]", required = false)
@Argument(fullName = "maxPhaseSites", shortName = "maxSites", doc = "The maximum number of successive heterozygous sites permitted to be used by the phasing algorithm", required = false)
protected Integer maxPhaseSites = 10; // 2^10 == 10^3 diploid haplotypes
@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing; [default:10.0]", required = false)
@Argument(fullName = "phaseQualityThresh", shortName = "phaseThresh", doc = "The minimum phasing quality score required to output phasing", required = false)
protected Double phaseQualityThresh = 10.0; // PQ = 10.0 <=> P(error) = 10^(-10/10) = 0.1, P(correct) = 0.9
@Hidden
@ -87,10 +120,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
protected String variantStatsFilePrefix = null;
private PhasingQualityStatsWriter statsWriter = null;
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing [default: 17]", required = false)
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for phasing", required = false)
public int MIN_BASE_QUALITY_SCORE = 17;
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for phasing [default: 20]", required = false)
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for phasing", required = false)
public int MIN_MAPPING_QUALITY_SCORE = 20;
@Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false)
@ -111,10 +144,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
public static final String PHASING_INCONSISTENT_KEY = "PhasingInconsistent";
@Argument(fullName = "enableMergePhasedSegregatingPolymorphismsToMNP", shortName = "enableMergeToMNP", doc = "Merge consecutive phased sites into MNP records [default:false]", required = false)
@Argument(fullName = "enableMergePhasedSegregatingPolymorphismsToMNP", shortName = "enableMergeToMNP", doc = "Merge consecutive phased sites into MNP records", required = false)
protected boolean enableMergePhasedSegregatingPolymorphismsToMNP = false;
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false)
@Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record", required = false)
protected int maxGenomicDistanceForMNP = 1;
@Hidden