100% version of PedReader
-- Passes all unit tests -- Added unit tests for missing fields
This commit is contained in:
parent
dd75ad9f49
commit
52f670c8b8
|
|
@ -117,13 +117,17 @@ public class PedReader {
|
||||||
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
|
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
|
||||||
final static private String commentMarker = "#";
|
final static private String commentMarker = "#";
|
||||||
|
|
||||||
public enum MissingPedFields {
|
public enum MissingPedField {
|
||||||
NO_FAMILY_ID,
|
NO_FAMILY_ID,
|
||||||
NO_PARENTS,
|
NO_PARENTS,
|
||||||
NO_SEX,
|
NO_SEX,
|
||||||
NO_PHENOTYPE
|
NO_PHENOTYPE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected enum Field {
|
||||||
|
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
|
||||||
|
}
|
||||||
|
|
||||||
// phenotype
|
// phenotype
|
||||||
private final static String MISSING_VALUE1 = "-9";
|
private final static String MISSING_VALUE1 = "-9";
|
||||||
private final static String MISSING_VALUE2 = "0";
|
private final static String MISSING_VALUE2 = "0";
|
||||||
|
|
@ -137,21 +141,21 @@ public class PedReader {
|
||||||
|
|
||||||
public PedReader() { }
|
public PedReader() { }
|
||||||
|
|
||||||
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
|
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
|
||||||
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
||||||
return parse(new FileReader(source), missingFields, sampleDB);
|
return parse(new FileReader(source), missingFields, sampleDB);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
|
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
|
||||||
final List<String> lines = new XReadLines(reader).readLines();
|
final List<String> lines = new XReadLines(reader).readLines();
|
||||||
|
|
||||||
// What are the record offsets?
|
// What are the record offsets?
|
||||||
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
|
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
|
||||||
final int samplePos = familyPos + 1;
|
final int samplePos = familyPos + 1;
|
||||||
final int paternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : samplePos + 1;
|
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
|
||||||
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
|
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
|
||||||
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
||||||
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
||||||
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
|
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
|
||||||
|
|
||||||
// go through once and determine properties
|
// go through once and determine properties
|
||||||
|
|
|
||||||
|
|
@ -26,15 +26,14 @@ package org.broadinstitute.sting.gatk.samples;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.Arrays;
|
import java.lang.reflect.Array;
|
||||||
import java.util.EnumSet;
|
import java.util.*;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* UnitTest for PedReader
|
* UnitTest for PedReader
|
||||||
|
|
@ -48,7 +47,7 @@ public class PedReaderUnitTest extends BaseTest {
|
||||||
private class PedReaderTest extends TestDataProvider {
|
private class PedReaderTest extends TestDataProvider {
|
||||||
public String fileContents;
|
public String fileContents;
|
||||||
public List<Sample> expectedSamples;
|
public List<Sample> expectedSamples;
|
||||||
EnumSet<PedReader.MissingPedFields> missing;
|
EnumSet<PedReader.MissingPedField> missing;
|
||||||
|
|
||||||
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
|
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
|
||||||
super(PedReaderTest.class, name);
|
super(PedReaderTest.class, name);
|
||||||
|
|
@ -57,19 +56,6 @@ public class PedReaderUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PedReaderTestMissing extends TestDataProvider {
|
|
||||||
public String fileContents;
|
|
||||||
public List<Sample> expectedSamples;
|
|
||||||
EnumSet<PedReader.MissingPedFields> missing;
|
|
||||||
|
|
||||||
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
|
|
||||||
super(PedReaderTest.class, name);
|
|
||||||
this.fileContents = fileContents;
|
|
||||||
this.expectedSamples = expectedSamples;
|
|
||||||
this.missing = missing;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Family ID
|
// Family ID
|
||||||
// Individual ID
|
// Individual ID
|
||||||
// Paternal ID
|
// Paternal ID
|
||||||
|
|
@ -189,7 +175,7 @@ public class PedReaderUnitTest extends BaseTest {
|
||||||
return PedReaderTest.getTests(PedReaderTest.class);
|
return PedReaderTest.getTests(PedReaderTest.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
|
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
|
||||||
logger.warn("Test " + test);
|
logger.warn("Test " + test);
|
||||||
PedReader reader = new PedReader();
|
PedReader reader = new PedReader();
|
||||||
SampleDataSource sampleDB = new SampleDataSource();
|
SampleDataSource sampleDB = new SampleDataSource();
|
||||||
|
|
@ -199,37 +185,91 @@ public class PedReaderUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "readerTest")
|
@Test(enabled = true, dataProvider = "readerTest")
|
||||||
public void testPedReader(PedReaderTest test) {
|
public void testPedReader(PedReaderTest test) {
|
||||||
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
|
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
|
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
|
||||||
public void testPedReaderWithComments(PedReaderTest test) {
|
public void testPedReaderWithComments(PedReaderTest test) {
|
||||||
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class));
|
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------
|
||||||
|
// missing format field tests
|
||||||
|
// -----------------------------------------------------------------
|
||||||
|
|
||||||
|
private class PedReaderTestMissing extends TestDataProvider {
|
||||||
|
public EnumSet<PedReader.MissingPedField> missingDesc;
|
||||||
|
public EnumSet<PedReader.Field> missingFields;
|
||||||
|
public final String fileContents;
|
||||||
|
public Sample expected;
|
||||||
|
|
||||||
|
|
||||||
|
private PedReaderTestMissing(final String name, final String fileContents,
|
||||||
|
EnumSet<PedReader.MissingPedField> missingDesc,
|
||||||
|
EnumSet<PedReader.Field> missingFields,
|
||||||
|
final Sample expected) {
|
||||||
|
super(PedReaderTestMissing.class, name);
|
||||||
|
this.fileContents = fileContents;
|
||||||
|
this.missingDesc = missingDesc;
|
||||||
|
this.missingFields = missingFields;
|
||||||
|
this.expected = expected;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@DataProvider(name = "readerTestMissing")
|
@DataProvider(name = "readerTestMissing")
|
||||||
public Object[][] createPEDFilesWithMissing() {
|
public Object[][] createPEDFilesWithMissing() {
|
||||||
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
|
|
||||||
Arrays.asList(
|
new PedReaderTestMissing("missingFam",
|
||||||
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
"fam1 kid dad mom 1 2",
|
||||||
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED),
|
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
|
||||||
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)),
|
EnumSet.of(PedReader.Field.FAMILY_ID),
|
||||||
String.format("%s%n%s%n%s",
|
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
|
||||||
"kid dad mom 1 2",
|
|
||||||
"dad 0 0 1 1",
|
new PedReaderTestMissing("missingParents",
|
||||||
"mom 0 0 2 2"));
|
"fam1 kid dad mom 1 2",
|
||||||
|
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
|
||||||
|
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
|
||||||
|
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
|
||||||
|
|
||||||
|
new PedReaderTestMissing("missingSex",
|
||||||
|
"fam1 kid dad mom 1 2",
|
||||||
|
EnumSet.of(PedReader.MissingPedField.NO_SEX),
|
||||||
|
EnumSet.of(PedReader.Field.GENDER),
|
||||||
|
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
|
||||||
|
|
||||||
|
new PedReaderTestMissing("missingPhenotype",
|
||||||
|
"fam1 kid dad mom 1 2",
|
||||||
|
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
|
||||||
|
EnumSet.of(PedReader.Field.PHENOTYPE),
|
||||||
|
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
|
||||||
|
|
||||||
|
new PedReaderTestMissing("missingEverythingButGender",
|
||||||
|
"fam1 kid dad mom 1 2",
|
||||||
|
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
|
||||||
|
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
|
||||||
|
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
|
||||||
|
|
||||||
|
|
||||||
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
|
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
|
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
|
||||||
public void testPedReaderWithMissing(PedReaderTest test) {
|
public void testPedReaderWithMissing(PedReaderTestMissing test) {
|
||||||
// public enum MissingPedFields {
|
final String contents = sliceContents(test.missingFields, test.fileContents);
|
||||||
// NO_FAMILY_ID,
|
logger.warn("Test " + test);
|
||||||
// NO_PARENTS,
|
PedReader reader = new PedReader();
|
||||||
// NO_SEX,
|
SampleDataSource sampleDB = new SampleDataSource();
|
||||||
// NO_PHENOTYPE
|
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
|
||||||
// }
|
final Sample missingSample = sampleDB.getSample("kid");
|
||||||
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID));
|
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
|
||||||
|
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
|
||||||
|
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
|
||||||
|
Collections.reverse(missingFields);
|
||||||
|
for ( PedReader.Field field : missingFields )
|
||||||
|
parts.remove(field.ordinal());
|
||||||
|
return Utils.join("\t", parts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue