100% version of PedReader

-- Passes all unit tests
-- Added unit tests for missing fields
This commit is contained in:
Mark DePristo 2011-10-03 06:12:58 -07:00
parent dd75ad9f49
commit 52f670c8b8
2 changed files with 114 additions and 70 deletions

View File

@ -117,13 +117,17 @@ public class PedReader {
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#";
public enum MissingPedFields {
public enum MissingPedField {
NO_FAMILY_ID,
NO_PARENTS,
NO_SEX,
NO_PHENOTYPE
}
protected enum Field {
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
}
// phenotype
private final static String MISSING_VALUE1 = "-9";
private final static String MISSING_VALUE2 = "0";
@ -137,21 +141,21 @@ public class PedReader {
public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
}
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets?
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
final int samplePos = familyPos + 1;
final int paternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties

View File

@ -26,15 +26,14 @@ package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.lang.reflect.Array;
import java.util.*;
/**
* UnitTest for PedReader
@ -48,7 +47,7 @@ public class PedReaderUnitTest extends BaseTest {
private class PedReaderTest extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
EnumSet<PedReader.MissingPedField> missing;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
@ -57,19 +56,6 @@ public class PedReaderUnitTest extends BaseTest {
}
}
private class PedReaderTestMissing extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
this.missing = missing;
}
}
// Family ID
// Individual ID
// Paternal ID
@ -189,7 +175,7 @@ public class PedReaderUnitTest extends BaseTest {
return PedReaderTest.getTests(PedReaderTest.class);
}
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
@ -199,37 +185,91 @@ public class PedReaderUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReader(PedReaderTest test) {
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class));
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
}
// -----------------------------------------------------------------
// missing format field tests
// -----------------------------------------------------------------
private class PedReaderTestMissing extends TestDataProvider {
public EnumSet<PedReader.MissingPedField> missingDesc;
public EnumSet<PedReader.Field> missingFields;
public final String fileContents;
public Sample expected;
private PedReaderTestMissing(final String name, final String fileContents,
EnumSet<PedReader.MissingPedField> missingDesc,
EnumSet<PedReader.Field> missingFields,
final Sample expected) {
super(PedReaderTestMissing.class, name);
this.fileContents = fileContents;
this.missingDesc = missingDesc;
this.missingFields = missingFields;
this.expected = expected;
}
}
@DataProvider(name = "readerTestMissing")
public Object[][] createPEDFilesWithMissing() {
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
Arrays.asList(
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n%s",
"kid dad mom 1 2",
"dad 0 0 1 1",
"mom 0 0 2 2"));
new PedReaderTestMissing("missingFam",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID),
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingParents",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
new PedReaderTestMissing("missingSex",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_SEX),
EnumSet.of(PedReader.Field.GENDER),
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
new PedReaderTestMissing("missingPhenotype",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
EnumSet.of(PedReader.Field.PHENOTYPE),
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
new PedReaderTestMissing("missingEverythingButGender",
"fam1 kid dad mom 1 2",
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
}
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) {
// public enum MissingPedFields {
// NO_FAMILY_ID,
// NO_PARENTS,
// NO_SEX,
// NO_PHENOTYPE
// }
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID));
public void testPedReaderWithMissing(PedReaderTestMissing test) {
final String contents = sliceContents(test.missingFields, test.fileContents);
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
final Sample missingSample = sampleDB.getSample("kid");
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
}
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
Collections.reverse(missingFields);
for ( PedReader.Field field : missingFields )
parts.remove(field.ordinal());
return Utils.join("\t", parts);
}
}