95% PedReader

-- Passes significiant unit tests
-- Implicit sample creation for mom / dad when you create single samples
-- Continuing cleanup of Sample and SampleDataSource
This commit is contained in:
Mark DePristo 2011-09-30 18:03:34 -04:00
parent 84160bd83f
commit dd75ad9f49
4 changed files with 109 additions and 24 deletions

View File

@ -160,13 +160,13 @@ public class PedReader {
final List<String[]> splits = new ArrayList<String[]>(lines.size()); final List<String[]> splits = new ArrayList<String[]>(lines.size());
for ( final String line : lines ) { for ( final String line : lines ) {
if ( line.startsWith(commentMarker)) continue; if ( line.startsWith(commentMarker)) continue;
String[] parts = line.split("\\W+"); String[] parts = line.split("\\s+");
if ( parts.length != nExpectedFields ) if ( parts.length != nExpectedFields )
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields"); throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
if ( phenotypePos != -1 ) { if ( phenotypePos != -1 ) {
isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]); isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
} }
splits.add(parts); splits.add(parts);
@ -211,12 +211,21 @@ public class PedReader {
} }
} }
final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype); final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
samples.add(s); samples.add(s);
sampleDB.addSample(s); sampleDB.addSample(s);
lineNo++; lineNo++;
} }
for ( final Sample sample : new ArrayList<Sample>(samples) ) {
Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
if ( dad != null ) samples.add(dad);
Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
if ( mom != null ) samples.add(mom);
}
sampleDB.validate(samples); sampleDB.validate(samples);
return samples; return samples;
} }
@ -227,4 +236,13 @@ public class PedReader {
else else
return string; return string;
} }
private final Sample maybeAddImplicitSample(SampleDataSource sampleDB, final String id, final String familyID, final Gender gender) {
if ( id != null && sampleDB.getSample(id) == null ) {
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
sampleDB.addSample(s);
return s;
} else
return null;
}
} }

View File

@ -117,8 +117,11 @@ public class Sample implements java.io.Serializable {
return gender; return gender;
} }
public String getFamilyId() { @Override
return familyID; public String toString() {
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
getQuantitativePhenotype(), getExtraProperties());
} }
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
@ -148,4 +151,34 @@ public class Sample implements java.io.Serializable {
public boolean hasExtraProperty(String key) { public boolean hasExtraProperty(String key) {
return properties.containsKey(key); return properties.containsKey(key);
} }
@Override
public int hashCode() {
return ID.hashCode();
}
@Override
public boolean equals(final Object o) {
if(o == null)
return false;
if(o instanceof Sample) {
Sample otherSample = (Sample)o;
return ID.equals(otherSample.ID) &&
equalOrNull(familyID, otherSample.familyID) &&
equalOrNull(paternalID, otherSample.paternalID) &&
equalOrNull(maternalID, otherSample.maternalID) &&
equalOrNull(gender, otherSample.gender) &&
equalOrNull(quantitativePhenotype, otherSample.quantitativePhenotype) &&
equalOrNull(affection, otherSample.affection) &&
equalOrNull(properties, otherSample.properties);
}
return false;
}
private final static boolean equalOrNull(final Object o1, final Object o2) {
if ( o1 == null )
return o2 == null;
else
return o2 == null ? false : o1.equals(o2);
}
} }

View File

@ -156,8 +156,8 @@ public class SampleDataSource {
HashSet<Sample> familyMembers = new HashSet<Sample>(); HashSet<Sample> familyMembers = new HashSet<Sample>();
for (Sample sample : samples.values()) { for (Sample sample : samples.values()) {
if (sample.getFamilyId() != null) { if (sample.getFamilyID() != null) {
if (sample.getFamilyId().equals(familyId)) if (sample.getFamilyID().equals(familyId))
familyMembers.add(sample); familyMembers.add(sample);
} }
} }
@ -172,7 +172,7 @@ public class SampleDataSource {
*/ */
public Set<Sample> getChildren(Sample sample) { public Set<Sample> getChildren(Sample sample) {
HashSet<Sample> children = new HashSet<Sample>(); HashSet<Sample> children = new HashSet<Sample>();
for (Sample familyMember : getFamily(sample.getFamilyId())) { for (Sample familyMember : getFamily(sample.getFamilyID())) {
if (familyMember.getMother() == sample || familyMember.getFather() == sample) { if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
children.add(familyMember); children.add(familyMember);
} }

View File

@ -33,6 +33,7 @@ import org.testng.annotations.Test;
import java.io.StringReader; import java.io.StringReader;
import java.util.Arrays; import java.util.Arrays;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashSet;
import java.util.List; import java.util.List;
/** /**
@ -47,6 +48,7 @@ public class PedReaderUnitTest extends BaseTest {
private class PedReaderTest extends TestDataProvider { private class PedReaderTest extends TestDataProvider {
public String fileContents; public String fileContents;
public List<Sample> expectedSamples; public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) { private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name); super(PedReaderTest.class, name);
@ -55,6 +57,19 @@ public class PedReaderUnitTest extends BaseTest {
} }
} }
private class PedReaderTestMissing extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
EnumSet<PedReader.MissingPedFields> missing;
private PedReaderTestMissing(final String name, EnumSet<PedReader.MissingPedFields> missing, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
this.missing = missing;
}
}
// Family ID // Family ID
// Individual ID // Individual ID
// Paternal ID // Paternal ID
@ -100,9 +115,9 @@ public class PedReaderUnitTest extends BaseTest {
new PedReaderTest("multipleUnrelated", new PedReaderTest("multipleUnrelated",
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s\n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 1", "fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2")); "fam2 s2 0 0 2 2"));
@ -111,7 +126,7 @@ public class PedReaderUnitTest extends BaseTest {
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s\n%s\n%s", String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2", "fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1", "fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2")); "fam1 mom 0 0 2 2"));
@ -121,14 +136,14 @@ public class PedReaderUnitTest extends BaseTest {
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
"fam1 kid dad mom 1 1"); "fam1 kid dad mom 1 2");
new PedReaderTest("partialTrio", new PedReaderTest("partialTrio",
Arrays.asList( Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s", String.format("%s%n%s",
"fam1 kid dad mom 1 2", "fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1")); "fam1 dad 0 0 1 1"));
@ -141,7 +156,7 @@ public class PedReaderUnitTest extends BaseTest {
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED), new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s\n%s", String.format("%s%n%s%n%s",
"fam1 kid dad mom 1 2", "fam1 kid dad mom 1 2",
"fam1 dad granddad1 grandma1 1 1", "fam1 dad granddad1 grandma1 1 1",
"fam1 mom granddad2 grandma2 2 2")); "fam1 mom granddad2 grandma2 2 2"));
@ -151,7 +166,7 @@ public class PedReaderUnitTest extends BaseTest {
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 1", "fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0")); "fam2 s2 0 0 2 10.0"));
@ -159,7 +174,7 @@ public class PedReaderUnitTest extends BaseTest {
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 -9", "fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0")); "fam2 s2 0 0 2 10.0"));
@ -167,7 +182,7 @@ public class PedReaderUnitTest extends BaseTest {
Arrays.asList( Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s", String.format("%s%n%s",
"fam1 s1 0 0 1 1", "fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10")); "fam2 s2 0 0 2 10"));
@ -179,7 +194,7 @@ public class PedReaderUnitTest extends BaseTest {
PedReader reader = new PedReader(); PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource(); SampleDataSource sampleDB = new SampleDataSource();
List<Sample> readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB); List<Sample> readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB);
Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples"); Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples), "Parsed incorrect number of samples");
} }
@Test(enabled = true, dataProvider = "readerTest") @Test(enabled = true, dataProvider = "readerTest")
@ -189,13 +204,32 @@ public class PedReaderUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) { public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class));
} }
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") @DataProvider(name = "readerTestMissing")
public Object[][] createPEDFilesWithMissing() {
new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID),
Arrays.asList(
new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s%n%s%n%s",
"kid dad mom 1 2",
"dad 0 0 1 1",
"mom 0 0 2 2"));
return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
}
@Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) { public void testPedReaderWithMissing(PedReaderTest test) {
// todo -- test MISSING by splicing strings // public enum MissingPedFields {
//runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); // NO_FAMILY_ID,
// NO_PARENTS,
// NO_SEX,
// NO_PHENOTYPE
// }
// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID));
} }
} }