From be2d29ce69a2fd1b58938a9e9e47faff870c657a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 5 Oct 2011 15:17:41 -0700 Subject: [PATCH] Final PED documentation --- .../arguments/GATKArgumentCollection.java | 66 +++++++++++-------- .../gatk/samples/PedigreeValidationType.java | 8 +++ 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index cd9068a64..486868dc2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -213,55 +213,63 @@ public class GATKArgumentCollection { // -------------------------------------------------------------------------------------------------------------- /** - * Reads PED file-formatted tabular text files describing meta-data about the samples being - * processed in the GATK. + *

Reads PED file-formatted tabular text files describing meta-data about the samples being + * processed in the GATK.

* - * See http://www.broadinstitute.org/mpg/tagger/faq.html - * See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped + * * - * The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory: + *

The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:

* - * Family ID - * Individual ID - * Paternal ID - * Maternal ID - * Sex (1=male; 2=female; other=unknown) - * Phenotype + * * - * The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person. + *

The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person. * A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a * quantitative trait or an affection status column: GATK will automatically detect which type - * (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed). + * (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).

* - * If an individual's sex is unknown, then any character other than 1 or 2 can be used. + *

If an individual's sex is unknown, then any character other than 1 or 2 can be used.

* - * You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that - * line will be ignored. Do not start any family IDs with this character therefore. + *

You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that + * line will be ignored. Do not start any family IDs with this character therefore.

* - * Affection status should be coded: + *

Affection status should be coded:

* - * -9 missing - * 0 missing - * 1 unaffected - * 2 affected + * * - * If any value outside of -9,0,1,2 is detected than the samples are assumed + *

If any value outside of -9,0,1,2 is detected than the samples are assumed * to phenotype values are interpreted as string phenotype values. In this case -9 uniquely - * represents the missing value. + * represents the missing value.

* - * Genotypes (column 7 onwards) cannot be specified to the GATK. + *

Genotypes (column 7 onwards) cannot be specified to the GATK.

* - * For example, here are two individuals (one row = one person): + *

For example, here are two individuals (one row = one person):

* + *
      *   FAM001  1  0 0  1  2
      *   FAM001  2  0 0  1  2
+     * 
* - * Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to - * tell the GATK PED parser that the corresponding fields are missing from the ped file. + *

Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to + * tell the GATK PED parser that the corresponding fields are missing from the ped file.

* - * Note that most GATK walkers do not use pedigree information. Walkers that require pedigree + *

Note that most GATK walkers do not use pedigree information. Walkers that require pedigree * data should clearly indicate so in their arguments and will throw errors if required pedigree - * information is missing. + * information is missing.

*/ @Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false) public List pedigreeFiles = Collections.emptyList(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java b/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java index 209636b54..bbf857820 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java @@ -28,6 +28,14 @@ package org.broadinstitute.sting.gatk.samples; * */ public enum PedigreeValidationType { + /** + * Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding + * entry in the pedigree file(s). + */ STRICT, + + /** + * Do not enforce any overlap between the VCF/BAM samples and the pedigree data + * */ SILENT }