95% PedReader

-- Passes significiant unit tests
-- Implicit sample creation for mom / dad when you create single samples
-- Continuing cleanup of Sample and SampleDataSource
This commit is contained in:
Mark DePristo 2011-09-30 18:03:34 -04:00
parent 84160bd83f
commit dd75ad9f49
4 changed files with 109 additions and 24 deletions

View File

@ -160,13 +160,13 @@ public class PedReader {
final List<String[]> splits = new ArrayList<String[]>(lines.size());
for ( final String line : lines ) {
if ( line.startsWith(commentMarker)) continue;
String[] parts = line.split("\\W+");
String[] parts = line.split("\\s+");
if ( parts.length != nExpectedFields )
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
if ( phenotypePos != -1 ) {
isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
}
splits.add(parts);
@ -211,12 +211,21 @@ public class PedReader {
}
}
final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype);
final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
samples.add(s);
sampleDB.addSample(s);
lineNo++;
}
for ( final Sample sample : new ArrayList<Sample>(samples) ) {
Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
if ( dad != null ) samples.add(dad);
Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
if ( mom != null ) samples.add(mom);
}
sampleDB.validate(samples);
return samples;
}
@ -227,4 +236,13 @@ public class PedReader {
else
return string;
}
private final Sample maybeAddImplicitSample(SampleDataSource sampleDB, final String id, final String familyID, final Gender gender) {
if ( id != null && sampleDB.getSample(id) == null ) {
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
sampleDB.addSample(s);
return s;
} else
return null;
}
}

View File

@ -117,8 +117,11 @@ public class Sample implements java.io.Serializable {
return gender;
}
public String getFamilyId() {
return familyID;
@Override
public String toString() {
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
getQuantitativePhenotype(), getExtraProperties());
}
// -------------------------------------------------------------------------------------
@ -148,4 +151,34 @@ public class Sample implements java.io.Serializable {
public boolean hasExtraProperty(String key) {
return properties.containsKey(key);
}
@Override
public int hashCode() {
return ID.hashCode();
}
@Override
public boolean equals(final Object o) {
if(o == null)
return false;
if(o instanceof Sample) {
Sample otherSample = (Sample)o;
return ID.equals(otherSample.ID) &&
equalOrNull(familyID, otherSample.familyID) &&
equalOrNull(paternalID, otherSample.paternalID) &&
equalOrNull(maternalID, otherSample.maternalID) &&
equalOrNull(gender, otherSample.gender) &&
equalOrNull(quantitativePhenotype, otherSample.quantitativePhenotype) &&
equalOrNull(affection, otherSample.affection) &&
equalOrNull(properties, otherSample.properties);
}
return false;
}
private final static boolean equalOrNull(final Object o1, final Object o2) {
if ( o1 == null )
return o2 == null;
else
return o2 == null ? false : o1.equals(o2);
}
}

View File

@ -156,8 +156,8 @@ public class SampleDataSource {
HashSet<Sample> familyMembers = new HashSet<Sample>();
for (Sample sample : samples.values()) {
if (sample.getFamilyId() != null) {
if (sample.getFamilyId().equals(familyId))
if (sample.getFamilyID() != null) {
if (sample.getFamilyID().equals(familyId))
familyMembers.add(sample);
}
}
@ -172,7 +172,7 @@ public class SampleDataSource {
*/
public Set<Sample> getChildren(Sample sample) {
HashSet<Sample> children = new HashSet<Sample>();
for (Sample familyMember : getFamily(sample.getFamilyId())) {
for (Sample familyMember : getFamily(sample.getFamilyID())) {
if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
children.add(familyMember);
}

View File

@ -33,6 +33,7 @@ import org.testng.annotations.Test;
import java.io.StringReader;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
/**
@ -47,6 +48,7 @@ public class PedReaderUnitTest extends BaseTest {
private class PedReaderTest extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
@ -55,6 +57,19 @@ public class PedReaderUnitTest extends BaseTest {
}
}
private class PedReaderTestMissing extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
this.missing = missing;
}
}
// Family ID
// Individual ID
// Paternal ID
@ -100,9 +115,9 @@ public class PedReaderUnitTest extends BaseTest {
new PedReaderTest("multipleUnrelated",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
String.format("%s\n%s",
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2"));
@ -111,7 +126,7 @@ public class PedReaderUnitTest extends BaseTest {
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s\n%s\n%s",
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2"));
@ -121,14 +136,14 @@ public class PedReaderUnitTest extends BaseTest {
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
"fam1 kid dad mom 1 1");
"fam1 kid dad mom 1 2");
new PedReaderTest("partialTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s",
String.format("%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1"));
@ -141,7 +156,7 @@ public class PedReaderUnitTest extends BaseTest {
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s\n%s",
String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2",
"fam1 dad granddad1 grandma1 1 1",
"fam1 mom granddad2 grandma2 2 2"));
@ -151,7 +166,7 @@ public class PedReaderUnitTest extends BaseTest {
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0"));
@ -159,7 +174,7 @@ public class PedReaderUnitTest extends BaseTest {
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
String.format("%s%n%s",
"fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0"));
@ -167,7 +182,7 @@ public class PedReaderUnitTest extends BaseTest {
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
String.format("%s%n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10"));
@ -179,7 +194,7 @@ public class PedReaderUnitTest extends BaseTest {
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
List<Sample> readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB);
Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples");
Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples), "Parsed incorrect number of samples");
}
@Test(enabled = true, dataProvider = "readerTest")
@ -189,13 +204,32 @@ public class PedReaderUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
@DataProvider(name = "readerTestMissing")
public Object[][] createPEDFilesWithMissing() {
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
Arrays.asList(
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n%s",
"kid dad mom 1 2",
"dad 0 0 1 1",
"mom 0 0 2 2"));
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
}
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) {
// todo -- test MISSING by splicing strings
//runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
// public enum MissingPedFields {
// NO_FAMILY_ID,
// NO_PARENTS,
// NO_SEX,
// NO_PHENOTYPE
// }
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID));
}
}