diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java index added09b6..e581c3718 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java @@ -160,13 +160,13 @@ public class PedReader { final List splits = new ArrayList(lines.size()); for ( final String line : lines ) { if ( line.startsWith(commentMarker)) continue; - String[] parts = line.split("\\W+"); + String[] parts = line.split("\\s+"); if ( parts.length != nExpectedFields ) throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields"); if ( phenotypePos != -1 ) { - isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]); + isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]); } splits.add(parts); @@ -211,12 +211,21 @@ public class PedReader { } } - final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype); + final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype); samples.add(s); sampleDB.addSample(s); lineNo++; } + for ( final Sample sample : new ArrayList(samples) ) { + Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE); + if ( dad != null ) samples.add(dad); + + Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE); + if ( mom != null ) samples.add(mom); + } + + sampleDB.validate(samples); return samples; } @@ -227,4 +236,13 @@ public class PedReader { else return string; } + + private final Sample maybeAddImplicitSample(SampleDataSource sampleDB, final String id, final String familyID, final Gender gender) { + if ( id != null && sampleDB.getSample(id) == null ) { + Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT); + sampleDB.addSample(s); + return s; + } else + return null; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java index 3426cf678..0a5043013 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java @@ -117,8 +117,11 @@ public class Sample implements java.io.Serializable { return gender; } - public String getFamilyId() { - return familyID; + @Override + public String toString() { + return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s", + getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(), + getQuantitativePhenotype(), getExtraProperties()); } // ------------------------------------------------------------------------------------- @@ -148,4 +151,34 @@ public class Sample implements java.io.Serializable { public boolean hasExtraProperty(String key) { return properties.containsKey(key); } + + @Override + public int hashCode() { + return ID.hashCode(); + } + + @Override + public boolean equals(final Object o) { + if(o == null) + return false; + if(o instanceof Sample) { + Sample otherSample = (Sample)o; + return ID.equals(otherSample.ID) && + equalOrNull(familyID, otherSample.familyID) && + equalOrNull(paternalID, otherSample.paternalID) && + equalOrNull(maternalID, otherSample.maternalID) && + equalOrNull(gender, otherSample.gender) && + equalOrNull(quantitativePhenotype, otherSample.quantitativePhenotype) && + equalOrNull(affection, otherSample.affection) && + equalOrNull(properties, otherSample.properties); + } + return false; + } + + private final static boolean equalOrNull(final Object o1, final Object o2) { + if ( o1 == null ) + return o2 == null; + else + return o2 == null ? false : o1.equals(o2); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java index e0d159947..b85759de2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java @@ -156,8 +156,8 @@ public class SampleDataSource { HashSet familyMembers = new HashSet(); for (Sample sample : samples.values()) { - if (sample.getFamilyId() != null) { - if (sample.getFamilyId().equals(familyId)) + if (sample.getFamilyID() != null) { + if (sample.getFamilyID().equals(familyId)) familyMembers.add(sample); } } @@ -172,7 +172,7 @@ public class SampleDataSource { */ public Set getChildren(Sample sample) { HashSet children = new HashSet(); - for (Sample familyMember : getFamily(sample.getFamilyId())) { + for (Sample familyMember : getFamily(sample.getFamilyID())) { if (familyMember.getMother() == sample || familyMember.getFather() == sample) { children.add(familyMember); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java index 1cad634dd..5eec0e8c8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java @@ -33,6 +33,7 @@ import org.testng.annotations.Test; import java.io.StringReader; import java.util.Arrays; import java.util.EnumSet; +import java.util.HashSet; import java.util.List; /** @@ -47,6 +48,7 @@ public class PedReaderUnitTest extends BaseTest { private class PedReaderTest extends TestDataProvider { public String fileContents; public List expectedSamples; + EnumSet missing; private PedReaderTest(final String name, final List expectedSamples, final String fileContents) { super(PedReaderTest.class, name); @@ -55,6 +57,19 @@ public class PedReaderUnitTest extends BaseTest { } } + private class PedReaderTestMissing extends TestDataProvider { + public String fileContents; + public List expectedSamples; + EnumSet missing; + + private PedReaderTestMissing(final String name, EnumSet missing, final List expectedSamples, final String fileContents) { + super(PedReaderTest.class, name); + this.fileContents = fileContents; + this.expectedSamples = expectedSamples; + this.missing = missing; + } + } + // Family ID // Individual ID // Paternal ID @@ -100,9 +115,9 @@ public class PedReaderUnitTest extends BaseTest { new PedReaderTest("multipleUnrelated", Arrays.asList( - new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED), - new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)), - String.format("%s\n%s", + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)), + String.format("%s%n%s", "fam1 s1 0 0 1 1", "fam2 s2 0 0 2 2")); @@ -111,7 +126,7 @@ public class PedReaderUnitTest extends BaseTest { new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)), - String.format("%s\n%s\n%s", + String.format("%s%n%s%n%s", "fam1 kid dad mom 1 2", "fam1 dad 0 0 1 1", "fam1 mom 0 0 2 2")); @@ -121,14 +136,14 @@ public class PedReaderUnitTest extends BaseTest { new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), - "fam1 kid dad mom 1 1"); + "fam1 kid dad mom 1 2"); new PedReaderTest("partialTrio", Arrays.asList( new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), - String.format("%s\n%s", + String.format("%s%n%s", "fam1 kid dad mom 1 2", "fam1 dad 0 0 1 1")); @@ -141,7 +156,7 @@ public class PedReaderUnitTest extends BaseTest { new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED), new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), - String.format("%s\n%s\n%s", + String.format("%s%n%s%n%s", "fam1 kid dad mom 1 2", "fam1 dad granddad1 grandma1 1 1", "fam1 mom granddad2 grandma2 2 2")); @@ -151,7 +166,7 @@ public class PedReaderUnitTest extends BaseTest { Arrays.asList( new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), - String.format("%s\n%s", + String.format("%s%n%s", "fam1 s1 0 0 1 1", "fam2 s2 0 0 2 10.0")); @@ -159,7 +174,7 @@ public class PedReaderUnitTest extends BaseTest { Arrays.asList( new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), - String.format("%s\n%s", + String.format("%s%n%s", "fam1 s1 0 0 1 -9", "fam2 s2 0 0 2 10.0")); @@ -167,7 +182,7 @@ public class PedReaderUnitTest extends BaseTest { Arrays.asList( new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), - String.format("%s\n%s", + String.format("%s%n%s", "fam1 s1 0 0 1 1", "fam2 s2 0 0 2 10")); @@ -179,7 +194,7 @@ public class PedReaderUnitTest extends BaseTest { PedReader reader = new PedReader(); SampleDataSource sampleDB = new SampleDataSource(); List readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB); - Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples"); + Assert.assertEquals(new HashSet(test.expectedSamples), new HashSet(readSamples), "Parsed incorrect number of samples"); } @Test(enabled = true, dataProvider = "readerTest") @@ -189,13 +204,32 @@ public class PedReaderUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") public void testPedReaderWithComments(PedReaderTest test) { - runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); + runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class)); } - @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") + @DataProvider(name = "readerTestMissing") + public Object[][] createPEDFilesWithMissing() { + new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID), + Arrays.asList( + new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)), + String.format("%s%n%s%n%s", + "kid dad mom 1 2", + "dad 0 0 1 1", + "mom 0 0 2 2")); + + return PedReaderTestMissing.getTests(PedReaderTestMissing.class); + } + + @Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader") public void testPedReaderWithMissing(PedReaderTest test) { - // todo -- test MISSING by splicing strings - //runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); +// public enum MissingPedFields { +// NO_FAMILY_ID, +// NO_PARENTS, +// NO_SEX, +// NO_PHENOTYPE +// } +// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID)); } - } \ No newline at end of file