Adding docs.

2013-01-22 15:29:33 -05:00 · 2013-01-22 15:29:33 -05:00 · d33c755aea
parent 7060e01a8e
commit d33c755aea
1 changed files with 49 additions and 1 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java
@ -62,27 +62,75 @@ import java.io.PrintStream;
 import java.util.*;

 /**
- * A simple walker for performing genotype concordance calculations between two callsets
+ * A simple walker for performing genotype concordance calculations between two callsets. Outputs a GATK table with
+ * per-sample and aggregate counts and frequencies, a summary table for NRD/NRS, and a table for site allele overlaps.
+ *
+ * <p>
+ *  Genotype concordance takes in two callsets (vcfs) and tabulates the number of sites which overlap and share alleles,
+ *  and for each sample, the genotype-by-genotype counts (for instance, the number of sites at which a sample was
+ *  called homozygous reference in the EVAL callset, but homozygous variant in the COMP callset). It outputs these
+ *  counts as well as convenient proportions (such as the proportion of het calls in the EVAL which were called REF in
+ *  the COMP) and metrics (such as NRD and NRS).
+ *
+ *  <h2> INPUT </h2>
+ *  <p>
+ *  Genotype concordance requires two callsets (as it does a comparison): an EVAL and a COMP callset, specified via
+ *  the -eval and -comp arguments
+ *  <p>
+ *  (Optional) Jexl expressions for genotype-level filtering of EVAL or COMP genotypes, specified via the -gfe and
+ *  -cfe arguments, respectively.
+ *
+ *  <h2> OUTPUT </h2>
+ *  Genotype Concordance writes a GATK report to the specified (via -o) file, consisting of multiple tables of counts
+ *  and proportions. These tables may be optionally moltenized via the -moltenize argument.
+ *
 */
 public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,VariantContext>>,ConcordanceMetrics> {

+    /**
+     * The callset you want to evaluate, typically this is where you'd put 'unassessed' callsets.
+     */
    @Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true)
    RodBinding<VariantContext> evalBinding;

+    /**
+     * The callset you want to treat as 'truth'. Can also be of unknown quality for the sake of callset comparisons.
+     */
    @Input(fullName="comp",shortName="comp",doc="The variants and genotypes to compare against",required=true)
    RodBinding<VariantContext> compBinding;

+    /**
+     * The FILTER field of the eval and comp VCFs will be ignored. If this flag is not included, all FILTER sites will
+     * be treated as not being present in the VCF. (That is, the genotypes will be assigned UNAVAILABLE, as distinct
+     * from NO_CALL).
+     */
    @Argument(fullName="ignoreFilters",doc="Filters will be ignored",required=false)
    boolean ignoreFilters = false;

+    /**
+     * A genotype level JEXL expression to apply to eval genotypes. Genotypes filtered in this way will be replaced by NO_CALL.
+     * For instance: -gfe 'GQ<20' will set to no-call any genotype with genotype quality less than 20.
+     */
    @Argument(shortName="gfe", fullName="genotypeFilterExpressionEval", doc="One or more criteria to use to set EVAL genotypes to no-call. "+
            "These genotype-level filters are only applied to the EVAL rod.", required=false)
    public ArrayList<String> genotypeFilterExpressionsEval = new ArrayList<String>();

+    /**
+     * Identical to -gfe except the filter is applied to genotypes in the comp rod.
+     */
    @Argument(shortName="gfc", fullName="genotypeFilterExpressionComp", doc="One or more criteria to use to set COMP genotypes to no-call. "+
            "These genotype-level filters are only applied to the COMP rod.", required=false)
    public ArrayList<String> genotypeFilterExpressionsComp = new ArrayList<String>();

+    /**
+     * Moltenize the count and proportion tables. Rather than moltenizing per-sample data into a 2x2 table, it is fully
+     * moltenized into elements. That is, WITHOUT this argument, each row of the table begins with the sample name and
+     * proceeds directly with counts/proportions of eval/comp counts (for instance HOM_REF/HOM_REF, HOM_REF/NO_CALL).
+     *
+     * If the Moltenize argument is given, the output will begin with a sample name, followed by the contrastive genotype
+     * type (such as HOM_REF/HOM_REF), followed by the count or proportion. This will significantly increase the number of
+     * rows.
+     */
    @Argument(shortName="moltenize",fullName="moltenize",doc="Molten rather than tabular output")
    public boolean moltenize = false;