Renaming quantitative trait to OtherPhenotype which is now a String not a double

-- we can now use PED file to represent population data or other arbitrary phenotype data, not just doubles
This commit is contained in:
Mark DePristo 2011-10-05 12:26:33 -07:00
parent 51ecc20867
commit e7c80f7c45
6 changed files with 43 additions and 53 deletions

View File

@ -28,8 +28,8 @@ package org.broadinstitute.sting.gatk.samples;
* Categorical sample trait for association and analysis * Categorical sample trait for association and analysis
* *
* Samples can have unknown status, be affected or unaffected by the * Samples can have unknown status, be affected or unaffected by the
* categorical trait, or they can be marked as actually having a * categorical trait, or they can be marked as actually having an
* quantitative trait value (stored in an associated value in the Sample class) * other trait value (stored in an associated value in the Sample class)
* *
* @author Mark DePristo * @author Mark DePristo
* @since Sept. 2011 * @since Sept. 2011
@ -41,6 +41,6 @@ public enum Affection {
AFFECTED, AFFECTED,
/** Unaffected by the disease */ /** Unaffected by the disease */
UNAFFECTED, UNAFFECTED,
/** A quantitative trait: value of the trait is stored elsewhere */ /** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
QUANTITATIVE OTHER
} }

View File

@ -63,6 +63,8 @@ import java.util.*;
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a * A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
* quantitative trait or an affection status column: PLINK will automatically detect which type * quantitative trait or an affection status column: PLINK will automatically detect which type
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed). * (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
* Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
* and are actually representing these values as strings instead of doubles
* *
* NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and * NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
* not a comma, i.e. 2.394 not 2,394 * not a comma, i.e. 2.394 not 2,394
@ -212,7 +214,7 @@ public class PedReader {
splits.add(parts); splits.add(parts);
lineNo++; lineNo++;
} }
logger.info("Trait is quantitative? " + isQT); logger.info("Phenotype is other? " + isQT);
// now go through and parse each record // now go through and parse each record
lineNo = 1; lineNo = 1;
@ -220,7 +222,7 @@ public class PedReader {
for ( final String[] parts : splits ) { for ( final String[] parts : splits ) {
String familyID = null, individualID, paternalID = null, maternalID = null; String familyID = null, individualID, paternalID = null, maternalID = null;
Gender sex = Gender.UNKNOWN; Gender sex = Gender.UNKNOWN;
double quantitativePhenotype = Sample.UNSET_QT; String quantitativePhenotype = Sample.UNSET_QT;
Affection affection = Affection.UNKNOWN; Affection affection = Affection.UNKNOWN;
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]); if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
@ -239,8 +241,8 @@ public class PedReader {
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) if ( parts[phenotypePos].equals(MISSING_VALUE1) )
affection = Affection.UNKNOWN; affection = Affection.UNKNOWN;
else { else {
affection = Affection.QUANTITATIVE; affection = Affection.OTHER;
quantitativePhenotype = Double.valueOf(parts[phenotypePos]); quantitativePhenotype = parts[phenotypePos];
} }
} else { } else {
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN; if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;

View File

@ -12,22 +12,22 @@ import java.util.Map;
public class Sample implements Comparable<Sample> { // implements java.io.Serializable { public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
final private String familyID, paternalID, maternalID; final private String familyID, paternalID, maternalID;
final private Gender gender; final private Gender gender;
final private double quantitativePhenotype; final private String otherPhenotype;
final private Affection affection; final private Affection affection;
final private String ID; final private String ID;
final private SampleDB infoDB; final private SampleDB infoDB;
final private Map<String, Object> properties = new HashMap<String, Object>(); final private Map<String, Object> properties = new HashMap<String, Object>();
public final static double UNSET_QT = Double.NaN; public final static String UNSET_QT = null;
public Sample(final String ID, final SampleDB infoDB, public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID, final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final double quantitativePhenotype) { final Gender gender, final Affection affection, final String otherPhenotype) {
this.familyID = familyID; this.familyID = familyID;
this.paternalID = paternalID; this.paternalID = paternalID;
this.maternalID = maternalID; this.maternalID = maternalID;
this.gender = gender; this.gender = gender;
this.quantitativePhenotype = quantitativePhenotype; this.otherPhenotype = otherPhenotype;
this.affection = affection; this.affection = affection;
this.ID = ID; this.ID = ID;
this.infoDB = infoDB; this.infoDB = infoDB;
@ -35,8 +35,8 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
protected Sample(final String ID, protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID, final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final double quantitativePhenotype) { final Gender gender, final Affection affection, final String otherPhenotype) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, quantitativePhenotype); this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
} }
protected Sample(final String ID, protected Sample(final String ID,
@ -51,8 +51,8 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT); this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
} }
public Sample(final String ID, final SampleDB infoDB, final Affection affection, final double quantitativePhenotype) { public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype); this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
} }
public Sample(String id, SampleDB infoDB) { public Sample(String id, SampleDB infoDB) {
@ -86,12 +86,12 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
return affection; return affection;
} }
public boolean hasQuantitativeTrait() { public boolean hasOtherPhenotype() {
return affection == Affection.QUANTITATIVE; return affection == Affection.OTHER;
} }
public double getQuantitativePhenotype() { public String getOtherPhenotype() {
return quantitativePhenotype; return otherPhenotype;
} }
/** /**
@ -127,7 +127,7 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
public String toString() { public String toString() {
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s", return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(), getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
getQuantitativePhenotype(), properties); getOtherPhenotype(), properties);
} }
// // ------------------------------------------------------------------------------------- // // -------------------------------------------------------------------------------------
@ -174,7 +174,7 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
equalOrNull(paternalID, otherSample.paternalID) && equalOrNull(paternalID, otherSample.paternalID) &&
equalOrNull(maternalID, otherSample.maternalID) && equalOrNull(maternalID, otherSample.maternalID) &&
equalOrNull(gender, otherSample.gender) && equalOrNull(gender, otherSample.gender) &&
equalOrNull(quantitativePhenotype, otherSample.quantitativePhenotype) && equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
equalOrNull(affection, otherSample.affection) && equalOrNull(affection, otherSample.affection) &&
equalOrNull(properties, otherSample.properties); equalOrNull(properties, otherSample.properties);
} }
@ -215,7 +215,7 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null), mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN), mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN), mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
mergeValues(prev.getID(), "QuantitativeTrait", prev.getQuantitativePhenotype(), next.getQuantitativePhenotype(), UNSET_QT)); mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
//mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap())); //mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
} }
} }

View File

@ -8,17 +8,6 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.*; import java.util.*;
/** /**
* Created by IntelliJ IDEA.
* User: brett
* Date: Jul 26, 2010
* Time: 3:30:09 PM
*
* This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
* in the GATK by the "--samples" argument. This class reads and parses those files.
*
* Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
* only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
* *
*/ */
public class SampleDB { public class SampleDB {

View File

@ -156,26 +156,26 @@ public class PedReaderUnitTest extends BaseTest {
"fam1 mom granddad2 grandma2 2 2")); "fam1 mom granddad2 grandma2 2 2"));
// Quantitative trait // Quantitative trait
new PedReaderTest("QuantitativeTrait", new PedReaderTest("OtherPhenotype",
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.OTHER, "1.0"),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
String.format("%s%n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 1", "fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0")); "fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitWithMissing", new PedReaderTest("OtherPhenotypeWithMissing",
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
String.format("%s%n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 -9", "fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0")); "fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitOnlyInts", new PedReaderTest("OtherPhenotypeOnlyInts",
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.OTHER, "1"),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10")),
String.format("%s%n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 1", "fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10")); "fam2 s2 0 0 2 10"));

View File

@ -6,16 +6,13 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
/** /**
* Created by IntelliJ IDEA. *
* User: brett
* Date: Sep 9, 2010
* Time: 8:21:00 AM
*/ */
public class SampleUnitTest extends BaseTest { public class SampleUnitTest extends BaseTest {
SampleDB db; SampleDB db;
static Sample fam1A, fam1B, fam1C; static Sample fam1A, fam1B, fam1C;
static Sample s1, s2; static Sample s1, s2;
static Sample trait1, trait2, trait3, trait4; static Sample trait1, trait2, trait3, trait4, trait5;
@BeforeClass @BeforeClass
public void init() { public void init() {
@ -31,7 +28,8 @@ public class SampleUnitTest extends BaseTest {
trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT); trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT); trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT); trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
trait4 = new Sample("t4", db, Affection.QUANTITATIVE, 1.0); trait4 = new Sample("t4", db, Affection.OTHER, "1.0");
trait5 = new Sample("t4", db, Affection.OTHER, "CEU");
} }
/** /**
@ -47,13 +45,14 @@ public class SampleUnitTest extends BaseTest {
Assert.assertEquals(null, fam1B.getMaternalID()); Assert.assertEquals(null, fam1B.getMaternalID());
Assert.assertEquals(Affection.AFFECTED, trait1.getAffection()); Assert.assertEquals(Affection.AFFECTED, trait1.getAffection());
Assert.assertEquals(Sample.UNSET_QT, trait1.getQuantitativePhenotype()); Assert.assertEquals(Sample.UNSET_QT, trait1.getOtherPhenotype());
Assert.assertEquals(Affection.UNAFFECTED, trait2.getAffection()); Assert.assertEquals(Affection.UNAFFECTED, trait2.getAffection());
Assert.assertEquals(Sample.UNSET_QT, trait2.getQuantitativePhenotype()); Assert.assertEquals(Sample.UNSET_QT, trait2.getOtherPhenotype());
Assert.assertEquals(Affection.UNKNOWN, trait3.getAffection()); Assert.assertEquals(Affection.UNKNOWN, trait3.getAffection());
Assert.assertEquals(Sample.UNSET_QT, trait3.getQuantitativePhenotype()); Assert.assertEquals(Sample.UNSET_QT, trait3.getOtherPhenotype());
Assert.assertEquals(Affection.QUANTITATIVE, trait4.getAffection()); Assert.assertEquals(Affection.OTHER, trait4.getAffection());
Assert.assertEquals(1.0, trait4.getQuantitativePhenotype()); Assert.assertEquals("1.0", trait4.getOtherPhenotype());
Assert.assertEquals("CEU", trait5.getOtherPhenotype());
} }
@Test() @Test()