100% version of PedReader
-- Passes all unit tests -- Added unit tests for missing fields
This commit is contained in:
parent
dd75ad9f49
commit
52f670c8b8
|
|
@ -117,13 +117,17 @@ public class PedReader {
|
|||
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
|
||||
final static private String commentMarker = "#";
|
||||
|
||||
public enum MissingPedFields {
|
||||
public enum MissingPedField {
|
||||
NO_FAMILY_ID,
|
||||
NO_PARENTS,
|
||||
NO_SEX,
|
||||
NO_PHENOTYPE
|
||||
}
|
||||
|
||||
protected enum Field {
|
||||
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
|
||||
}
|
||||
|
||||
// phenotype
|
||||
private final static String MISSING_VALUE1 = "-9";
|
||||
private final static String MISSING_VALUE2 = "0";
|
||||
|
|
@ -137,21 +141,21 @@ public class PedReader {
|
|||
|
||||
public PedReader() { }
|
||||
|
||||
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
|
||||
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
|
||||
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
||||
return parse(new FileReader(source), missingFields, sampleDB);
|
||||
}
|
||||
|
||||
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
|
||||
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDataSource sampleDB) {
|
||||
final List<String> lines = new XReadLines(reader).readLines();
|
||||
|
||||
// What are the record offsets?
|
||||
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
|
||||
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
|
||||
final int samplePos = familyPos + 1;
|
||||
final int paternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : samplePos + 1;
|
||||
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
|
||||
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
||||
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
||||
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
|
||||
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
|
||||
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
||||
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
||||
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
|
||||
|
||||
// go through once and determine properties
|
||||
|
|
|
|||
|
|
@ -26,15 +26,14 @@ package org.broadinstitute.sting.gatk.samples;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.lang.reflect.Array;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* UnitTest for PedReader
|
||||
|
|
@ -48,7 +47,7 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
private class PedReaderTest extends TestDataProvider {
|
||||
public String fileContents;
|
||||
public List<Sample> expectedSamples;
|
||||
EnumSet<PedReader.MissingPedFields> missing;
|
||||
EnumSet<PedReader.MissingPedField> missing;
|
||||
|
||||
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
|
||||
super(PedReaderTest.class, name);
|
||||
|
|
@ -57,19 +56,6 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private class PedReaderTestMissing extends TestDataProvider {
|
||||
public String fileContents;
|
||||
public List<Sample> expectedSamples;
|
||||
EnumSet<PedReader.MissingPedFields> missing;
|
||||
|
||||
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
|
||||
super(PedReaderTest.class, name);
|
||||
this.fileContents = fileContents;
|
||||
this.expectedSamples = expectedSamples;
|
||||
this.missing = missing;
|
||||
}
|
||||
}
|
||||
|
||||
// Family ID
|
||||
// Individual ID
|
||||
// Paternal ID
|
||||
|
|
@ -115,17 +101,17 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
|
||||
new PedReaderTest("multipleUnrelated",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 2"));
|
||||
|
||||
new PedReaderTest("explicitTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1",
|
||||
|
|
@ -133,29 +119,29 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
|
||||
new PedReaderTest("implicitTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
"fam1 kid dad mom 1 2");
|
||||
|
||||
new PedReaderTest("partialTrio",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad 0 0 1 1"));
|
||||
|
||||
new PedReaderTest("bigPedigree",
|
||||
Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
|
||||
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
|
||||
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
|
||||
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
|
||||
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
|
||||
String.format("%s%n%s%n%s",
|
||||
"fam1 kid dad mom 1 2",
|
||||
"fam1 dad granddad1 grandma1 1 1",
|
||||
|
|
@ -164,24 +150,24 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
// Quantitative trait
|
||||
new PedReaderTest("QuantitativeTrait",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 10.0"));
|
||||
|
||||
new PedReaderTest("QuantitativeTraitWithMissing",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 -9",
|
||||
"fam2 s2 0 0 2 10.0"));
|
||||
|
||||
new PedReaderTest("QuantitativeTraitOnlyInts",
|
||||
Arrays.asList(
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
|
||||
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
|
||||
String.format("%s%n%s",
|
||||
"fam1 s1 0 0 1 1",
|
||||
"fam2 s2 0 0 2 10"));
|
||||
|
|
@ -189,7 +175,7 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
return PedReaderTest.getTests(PedReaderTest.class);
|
||||
}
|
||||
|
||||
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
|
||||
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
|
||||
logger.warn("Test " + test);
|
||||
PedReader reader = new PedReader();
|
||||
SampleDataSource sampleDB = new SampleDataSource();
|
||||
|
|
@ -199,37 +185,91 @@ public class PedReaderUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true, dataProvider = "readerTest")
|
||||
public void testPedReader(PedReaderTest test) {
|
||||
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
|
||||
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
|
||||
public void testPedReaderWithComments(PedReaderTest test) {
|
||||
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class));
|
||||
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// missing format field tests
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private class PedReaderTestMissing extends TestDataProvider {
|
||||
public EnumSet<PedReader.MissingPedField> missingDesc;
|
||||
public EnumSet<PedReader.Field> missingFields;
|
||||
public final String fileContents;
|
||||
public Sample expected;
|
||||
|
||||
|
||||
private PedReaderTestMissing(final String name, final String fileContents,
|
||||
EnumSet<PedReader.MissingPedField> missingDesc,
|
||||
EnumSet<PedReader.Field> missingFields,
|
||||
final Sample expected) {
|
||||
super(PedReaderTestMissing.class, name);
|
||||
this.fileContents = fileContents;
|
||||
this.missingDesc = missingDesc;
|
||||
this.missingFields = missingFields;
|
||||
this.expected = expected;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "readerTestMissing")
|
||||
public Object[][] createPEDFilesWithMissing() {
|
||||
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
|
||||
Arrays.asList(
|
||||
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED),
|
||||
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)),
|
||||
String.format("%s%n%s%n%s",
|
||||
"kid dad mom 1 2",
|
||||
"dad 0 0 1 1",
|
||||
"mom 0 0 2 2"));
|
||||
|
||||
new PedReaderTestMissing("missingFam",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
|
||||
EnumSet.of(PedReader.Field.FAMILY_ID),
|
||||
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
|
||||
|
||||
new PedReaderTestMissing("missingParents",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
|
||||
EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
|
||||
new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
|
||||
|
||||
new PedReaderTestMissing("missingSex",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_SEX),
|
||||
EnumSet.of(PedReader.Field.GENDER),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
|
||||
|
||||
new PedReaderTestMissing("missingPhenotype",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
|
||||
EnumSet.of(PedReader.Field.PHENOTYPE),
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
|
||||
|
||||
new PedReaderTestMissing("missingEverythingButGender",
|
||||
"fam1 kid dad mom 1 2",
|
||||
EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
|
||||
EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
|
||||
new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
|
||||
|
||||
|
||||
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
|
||||
public void testPedReaderWithMissing(PedReaderTest test) {
|
||||
// public enum MissingPedFields {
|
||||
// NO_FAMILY_ID,
|
||||
// NO_PARENTS,
|
||||
// NO_SEX,
|
||||
// NO_PHENOTYPE
|
||||
// }
|
||||
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID));
|
||||
public void testPedReaderWithMissing(PedReaderTestMissing test) {
|
||||
final String contents = sliceContents(test.missingFields, test.fileContents);
|
||||
logger.warn("Test " + test);
|
||||
PedReader reader = new PedReader();
|
||||
SampleDataSource sampleDB = new SampleDataSource();
|
||||
reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
|
||||
final Sample missingSample = sampleDB.getSample("kid");
|
||||
Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
|
||||
}
|
||||
|
||||
private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
|
||||
List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
|
||||
final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
|
||||
Collections.reverse(missingFields);
|
||||
for ( PedReader.Field field : missingFields )
|
||||
parts.remove(field.ordinal());
|
||||
return Utils.join("\t", parts);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue