From 06d78ba0680bf3ba306d01680cef5e525ca26304 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Fri, 4 Apr 2014 10:27:09 -0400 Subject: [PATCH] Expanded documentation to include description of which callsets are being compared in what order and more definitions --- .../variantutils/GenotypeConcordance.java | 104 ++++++++++++------ 1 file changed, 69 insertions(+), 35 deletions(-) diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 08c938583..1bef3134a 100755 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -68,49 +68,62 @@ import java.util.*; *

Output

*

* Genotype Concordance writes a GATK report to the specified file (via -o), consisting of multiple tables of counts - * and proportions. These tables are constructed on a per-sample basis, and include counts of EVAL vs COMP genotype states, and the - * number of times the alternate alleles between the EVAL and COMP sample did not match up. + * and proportions. These tables are constructed on a per-sample basis, and include counts of EVAL vs COMP genotype + * states. + *

+ *

Tables

+ *

+ * Headers for the (non-moltenized -- see below) GenotypeConcordance counts and proportions tables give the genotype of + * the COMP callset followed by the genotype of the EVAL callset. For example the value corresponding to HOM_REF_HET + * reflects variants called HOM_REF in the COMP callset and HET in the EVAL callset. Variants for which the alternate + * alleles between the EVAL and COMP sample did not match are excluded from genotype comparisons and given in the + * "Mismatching_Alleles" field. + *

+ *

+ * It may be informative to reshape rows of the GenotypeConcordance counts and proportions tables into separate row-major tables + * where the columns indicate the COMP genotype and the rows indicate the EVAL genotype for easy comparison between the + * two callsets. This can be done with a command similar to d <- matrix(sampleRow,nrow=6,byrow=T) in R where sampleRow is the 36-value row corresponding to the sample of interest, excluding "Mismatching_Alleles". + * In Excel this can be accomplished using the OFFSET function. + *

+ * *

* *

Term and metrics definitions

*

*

*

* - *

Moltenized tables

- * - *

These tables may be optionally moltenized via the -moltenize argument. That is, the standard table - * - *

- *  Sample   NO_CALL_HOM_REF  NO_CALL_HET  NO_CALL_HOM_VAR   (...)
- *  NA12878       0.003        0.001            0.000        (...)
- *  NA12891       0.005        0.000            0.000        (...)
- *  
- * - * would instead be displayed - * - *
- *  NA12878  NO_CALL_HOM_REF   0.003
- *  NA12878  NO_CALL_HET       0.001
- *  NA12878  NO_CALL_HOM_VAR   0.000
- *  NA12891  NO_CALL_HOM_REF   0.005
- *  NA12891  NO_CALL_HET       0.000
- *  NA12891  NO_CALL_HOM_VAR   0.000
- *  (...)
- *  
- * *

Site-level allelic concordance

* *

@@ -158,6 +171,27 @@ import java.util.*; * in which case all records are used. There is currently no way to assess concordance metrics on filtered sites * exclusively. SelectVariants can be used to extract filtered sites, and VariantFiltration used to un-filter them. * + *

Moltenized tables

+ * + *

These tables may be optionally moltenized via the -moltenize argument. That is, the standard table + * + *

+ *  Sample   NO_CALL_HOM_REF  NO_CALL_HET  NO_CALL_HOM_VAR   (...)
+ *  NA12878       0.003        0.001            0.000        (...)
+ *  NA12891       0.005        0.000            0.000        (...)
+ *  
+ * + * would instead be displayed + * + *
+ *  NA12878  NO_CALL_HOM_REF   0.003
+ *  NA12878  NO_CALL_HET       0.001
+ *  NA12878  NO_CALL_HOM_VAR   0.000
+ *  NA12891  NO_CALL_HOM_REF   0.005
+ *  NA12891  NO_CALL_HET       0.000
+ *  NA12891  NO_CALL_HOM_VAR   0.000
+ *  (...)
+ *  
*/ @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )