100% version of PedReader

-- Passes all unit tests
-- Added unit tests for missing fields
This commit is contained in:
Mark DePristo 2011-10-03 06:12:58 -07:00
parent dd75ad9f49
commit 52f670c8b8
2 changed files with 114 additions and 70 deletions

View File

@ -117,13 +117,17 @@ public class PedReader {
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#";
public enum MissingPedFields {
public enum MissingPedField {
NO_FAMILY_ID,
NO_PARENTS,
NO_SEX,
NO_PHENOTYPE
}
protected enum Field {
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
}
// phenotype
private final static String MISSING_VALUE1 = "-9";
private final static String MISSING_VALUE2 = "0";
@ -137,21 +141,21 @@ public class PedReader {
public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
}
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets?
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
final int samplePos = familyPos + 1;
final int paternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties

View File

@ -26,15 +26,14 @@ package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.lang.reflect.Array;
import java.util.*;
/**
* UnitTest for PedReader
@ -48,7 +47,7 @@ public class PedReaderUnitTest extends BaseTest {
private class PedReaderTest extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
EnumSet<PedReader.MissingPedField> missing;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
@ -57,19 +56,6 @@ public class PedReaderUnitTest extends BaseTest {
}
}
private class PedReaderTestMissing extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
this.missing = missing;
}
}
// Family ID
// Individual ID
// Paternal ID
@ -115,17 +101,17 @@ public class PedReaderUnitTest extends BaseTest {
new PedReaderTest("multipleUnrelated",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2"));
new PedReaderTest("explicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
@ -133,29 +119,29 @@ public class PedReaderUnitTest extends BaseTest {
new PedReaderTest("implicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
"fam1 kid dad mom 1 2");
new PedReaderTest("partialTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1"));
new PedReaderTest("bigPedigree",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad granddad1 grandma1 1 1",
@ -164,24 +150,24 @@ public class PedReaderUnitTest extends BaseTest {
// Quantitative trait
new PedReaderTest("QuantitativeTrait",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitWithMissing",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s%n%s",
"fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitOnlyInts",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10"));
@ -189,7 +175,7 @@ public class PedReaderUnitTest extends BaseTest {
return PedReaderTest.getTests(PedReaderTest.class);
}
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
@ -199,37 +185,91 @@ public class PedReaderUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReader(PedReaderTest test) {
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class));
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
}
// -----------------------------------------------------------------
// missing format field tests
// -----------------------------------------------------------------
private class PedReaderTestMissing extends TestDataProvider {
public EnumSet<PedReader.MissingPedField> missingDesc;
public EnumSet<PedReader.Field> missingFields;
public final String fileContents;
public Sample expected;
private PedReaderTestMissing(final String name, final String fileContents,
EnumSet<PedReader.MissingPedField> missingDesc,
EnumSet<PedReader.Field> missingFields,
final Sample expected) {
super(PedReaderTestMissing.class, name);
this.fileContents = fileContents;
this.missingDesc = missingDesc;
this.missingFields = missingFields;
this.expected = expected;
}
}
@DataProvider(name = "readerTestMissing")
public Object[][] createPEDFilesWithMissing() {
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
Arrays.asList(
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n%s",
"kid dad mom 1 2",
"dad 0 0 1 1",
"mom 0 0 2 2"));
new PedReaderTestMissing("missingFam",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID),
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingParents",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingSex",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_SEX),
EnumSet.of(PedReader.Field.GENDER),
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
new PedReaderTestMissing("missingPhenotype",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
EnumSet.of(PedReader.Field.PHENOTYPE),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
new PedReaderTestMissing("missingEverythingButGender",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
}
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) {
// public enum MissingPedFields {
// NO_FAMILY_ID,
// NO_PARENTS,
// NO_SEX,
// NO_PHENOTYPE
// }
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID));
public void testPedReaderWithMissing(PedReaderTestMissing test) {
final String contents = sliceContents(test.missingFields, test.fileContents);
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
final Sample missingSample = sampleDB.getSample("kid");
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
}
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
Collections.reverse(missingFields);
for ( PedReader.Field field : missingFields )
parts.remove(field.ordinal());
return Utils.join("\t", parts);
}
}