100% version of PedReader

-- Passes all unit tests
-- Added unit tests for missing fields
This commit is contained in:
Mark DePristo 2011-10-03 06:12:58 -07:00
parent dd75ad9f49
commit 52f670c8b8
2 changed files with 114 additions and 70 deletions

View File

@ -117,13 +117,17 @@ public class PedReader {
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2")); final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#"; final static private String commentMarker = "#";
public enum MissingPedFields { public enum MissingPedField {
NO_FAMILY_ID, NO_FAMILY_ID,
NO_PARENTS, NO_PARENTS,
NO_SEX, NO_SEX,
NO_PHENOTYPE NO_PHENOTYPE
} }
protected enum Field {
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
}
// phenotype // phenotype
private final static String MISSING_VALUE1 = "-9"; private final static String MISSING_VALUE1 = "-9";
private final static String MISSING_VALUE2 = "0"; private final static String MISSING_VALUE2 = "0";
@ -137,21 +141,21 @@ public class PedReader {
public PedReader() { } public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException { public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields); logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB); return parse(new FileReader(source), missingFields, sampleDB);
} }
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) { public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
final List<String> lines = new XReadLines(reader).readLines(); final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets? // What are the record offsets?
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0; final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
final int samplePos = familyPos + 1; final int samplePos = familyPos + 1;
final int paternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : samplePos + 1; final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1; final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1; final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1; final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1; final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties // go through once and determine properties

View File

@ -26,15 +26,14 @@ package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.StringReader; import java.io.StringReader;
import java.util.Arrays; import java.lang.reflect.Array;
import java.util.EnumSet; import java.util.*;
import java.util.HashSet;
import java.util.List;
/** /**
* UnitTest for PedReader * UnitTest for PedReader
@ -48,7 +47,7 @@ public class PedReaderUnitTest extends BaseTest {
private class PedReaderTest extends TestDataProvider { private class PedReaderTest extends TestDataProvider {
public String fileContents; public String fileContents;
public List<Sample> expectedSamples; public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing; EnumSet<PedReader.MissingPedField> missing;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) { private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name); super(PedReaderTest.class, name);
@ -57,19 +56,6 @@ public class PedReaderUnitTest extends BaseTest {
} }
} }
private class PedReaderTestMissing extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
this.missing = missing;
}
}
// Family ID // Family ID
// Individual ID // Individual ID
// Paternal ID // Paternal ID
@ -189,7 +175,7 @@ public class PedReaderUnitTest extends BaseTest {
return PedReaderTest.getTests(PedReaderTest.class); return PedReaderTest.getTests(PedReaderTest.class);
} }
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) { private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
logger.warn("Test " + test); logger.warn("Test " + test);
PedReader reader = new PedReader(); PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource(); SampleDataSource sampleDB = new SampleDataSource();
@ -199,37 +185,91 @@ public class PedReaderUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "readerTest") @Test(enabled = true, dataProvider = "readerTest")
public void testPedReader(PedReaderTest test) { public void testPedReader(PedReaderTest test) {
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
} }
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) { public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class)); runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
}
// -----------------------------------------------------------------
// missing format field tests
// -----------------------------------------------------------------
private class PedReaderTestMissing extends TestDataProvider {
public EnumSet<PedReader.MissingPedField> missingDesc;
public EnumSet<PedReader.Field> missingFields;
public final String fileContents;
public Sample expected;
private PedReaderTestMissing(final String name, final String fileContents,
EnumSet<PedReader.MissingPedField> missingDesc,
EnumSet<PedReader.Field> missingFields,
final Sample expected) {
super(PedReaderTestMissing.class, name);
this.fileContents = fileContents;
this.missingDesc = missingDesc;
this.missingFields = missingFields;
this.expected = expected;
}
} }
@DataProvider(name = "readerTestMissing") @DataProvider(name = "readerTestMissing")
public Object[][] createPEDFilesWithMissing() { public Object[][] createPEDFilesWithMissing() {
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
Arrays.asList( new PedReaderTestMissing("missingFam",
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED), "fam1 kid dad mom 1 2",
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED), EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)), EnumSet.of(PedReader.Field.FAMILY_ID),
String.format("%s%n%s%n%s", new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
"kid dad mom 1 2",
"dad 0 0 1 1", new PedReaderTestMissing("missingParents",
"mom 0 0 2 2")); "fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingSex",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_SEX),
EnumSet.of(PedReader.Field.GENDER),
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
new PedReaderTestMissing("missingPhenotype",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
EnumSet.of(PedReader.Field.PHENOTYPE),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
new PedReaderTestMissing("missingEverythingButGender",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
return PedReaderTestMissing.getTests(PedReaderTestMissing.class); return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
} }
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader") @Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) { public void testPedReaderWithMissing(PedReaderTestMissing test) {
// public enum MissingPedFields { final String contents = sliceContents(test.missingFields, test.fileContents);
// NO_FAMILY_ID, logger.warn("Test " + test);
// NO_PARENTS, PedReader reader = new PedReader();
// NO_SEX, SampleDataSource sampleDB = new SampleDataSource();
// NO_PHENOTYPE reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
// } final Sample missingSample = sampleDB.getSample("kid");
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID)); Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
}
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
Collections.reverse(missingFields);
for ( PedReader.Field field : missingFields )
parts.remove(field.ordinal());
return Utils.join("\t", parts);
} }
} }