From 52f670c8b86787d9e72c5e179fe82d37c5e4729f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 3 Oct 2011 06:12:58 -0700 Subject: [PATCH] 100% version of PedReader -- Passes all unit tests -- Added unit tests for missing fields --- .../sting/gatk/samples/PedReader.java | 20 ++- .../sting/gatk/samples/PedReaderUnitTest.java | 164 +++++++++++------- 2 files changed, 114 insertions(+), 70 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java index e581c3718..27b9181de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java @@ -117,13 +117,17 @@ public class PedReader { final static private Set CATAGORICAL_TRAIT_VALUES = new HashSet(Arrays.asList("-9", "0", "1", "2")); final static private String commentMarker = "#"; - public enum MissingPedFields { + public enum MissingPedField { NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE } + protected enum Field { + FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE + } + // phenotype private final static String MISSING_VALUE1 = "-9"; private final static String MISSING_VALUE2 = "0"; @@ -137,21 +141,21 @@ public class PedReader { public PedReader() { } - public final List parse(File source, EnumSet missingFields, SampleDataSource sampleDB) throws FileNotFoundException { + public final List parse(File source, EnumSet missingFields, SampleDataSource sampleDB) throws FileNotFoundException { logger.info("Reading PED file " + source + " with missing fields: " + missingFields); return parse(new FileReader(source), missingFields, sampleDB); } - public final List parse(Reader reader, EnumSet missingFields, SampleDataSource sampleDB) { + public final List parse(Reader reader, EnumSet missingFields, SampleDataSource sampleDB) { final List lines = new XReadLines(reader).readLines(); // What are the record offsets? - final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0; + final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0; final int samplePos = familyPos + 1; - final int paternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : samplePos + 1; - final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1; - final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1; - final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1; + final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1; + final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1; + final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1; + final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1; final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1; // go through once and determine properties diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java index 5eec0e8c8..35be45bc7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java @@ -26,15 +26,14 @@ package org.broadinstitute.sting.gatk.samples; import org.apache.log4j.Logger; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.StringReader; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.HashSet; -import java.util.List; +import java.lang.reflect.Array; +import java.util.*; /** * UnitTest for PedReader @@ -48,7 +47,7 @@ public class PedReaderUnitTest extends BaseTest { private class PedReaderTest extends TestDataProvider { public String fileContents; public List expectedSamples; - EnumSet missing; + EnumSet missing; private PedReaderTest(final String name, final List expectedSamples, final String fileContents) { super(PedReaderTest.class, name); @@ -57,19 +56,6 @@ public class PedReaderUnitTest extends BaseTest { } } - private class PedReaderTestMissing extends TestDataProvider { - public String fileContents; - public List expectedSamples; - EnumSet missing; - - private PedReaderTestMissing(final String name, EnumSet missing, final List expectedSamples, final String fileContents) { - super(PedReaderTest.class, name); - this.fileContents = fileContents; - this.expectedSamples = expectedSamples; - this.missing = missing; - } - } - // Family ID // Individual ID // Paternal ID @@ -115,17 +101,17 @@ public class PedReaderUnitTest extends BaseTest { new PedReaderTest("multipleUnrelated", Arrays.asList( - new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), - new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)), + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)), String.format("%s%n%s", "fam1 s1 0 0 1 1", "fam2 s2 0 0 2 2")); new PedReaderTest("explicitTrio", Arrays.asList( - new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), - new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), - new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)), + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)), String.format("%s%n%s%n%s", "fam1 kid dad mom 1 2", "fam1 dad 0 0 1 1", @@ -133,29 +119,29 @@ public class PedReaderUnitTest extends BaseTest { new PedReaderTest("implicitTrio", Arrays.asList( - new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), - new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), - new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), "fam1 kid dad mom 1 2"); new PedReaderTest("partialTrio", Arrays.asList( - new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), - new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), - new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), String.format("%s%n%s", "fam1 kid dad mom 1 2", "fam1 dad 0 0 1 1")); new PedReaderTest("bigPedigree", Arrays.asList( - new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), - new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED), - new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), - new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN), - new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED), - new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), - new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED), + new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN), + new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED), + new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), String.format("%s%n%s%n%s", "fam1 kid dad mom 1 2", "fam1 dad granddad1 grandma1 1 1", @@ -164,24 +150,24 @@ public class PedReaderUnitTest extends BaseTest { // Quantitative trait new PedReaderTest("QuantitativeTrait", Arrays.asList( - new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), - new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), String.format("%s%n%s", "fam1 s1 0 0 1 1", "fam2 s2 0 0 2 10.0")); new PedReaderTest("QuantitativeTraitWithMissing", Arrays.asList( - new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT), - new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), String.format("%s%n%s", "fam1 s1 0 0 1 -9", "fam2 s2 0 0 2 10.0")); new PedReaderTest("QuantitativeTraitOnlyInts", Arrays.asList( - new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), - new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), String.format("%s%n%s", "fam1 s1 0 0 1 1", "fam2 s2 0 0 2 10")); @@ -189,7 +175,7 @@ public class PedReaderUnitTest extends BaseTest { return PedReaderTest.getTests(PedReaderTest.class); } - private static final void runTest(PedReaderTest test, String myFileContents, EnumSet missing) { + private static final void runTest(PedReaderTest test, String myFileContents, EnumSet missing) { logger.warn("Test " + test); PedReader reader = new PedReader(); SampleDataSource sampleDB = new SampleDataSource(); @@ -199,37 +185,91 @@ public class PedReaderUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "readerTest") public void testPedReader(PedReaderTest test) { - runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); + runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class)); } @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") public void testPedReaderWithComments(PedReaderTest test) { - runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedFields.class)); + runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class)); + } + + // ----------------------------------------------------------------- + // missing format field tests + // ----------------------------------------------------------------- + + private class PedReaderTestMissing extends TestDataProvider { + public EnumSet missingDesc; + public EnumSet missingFields; + public final String fileContents; + public Sample expected; + + + private PedReaderTestMissing(final String name, final String fileContents, + EnumSet missingDesc, + EnumSet missingFields, + final Sample expected) { + super(PedReaderTestMissing.class, name); + this.fileContents = fileContents; + this.missingDesc = missingDesc; + this.missingFields = missingFields; + this.expected = expected; + } } @DataProvider(name = "readerTestMissing") public Object[][] createPEDFilesWithMissing() { - new PedReaderTestMissing("trioMissingFam", EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID), - Arrays.asList( - new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED), - new Sample("dad", null, null, null, Gender.MALE, Affection.UNAFFECTED), - new Sample("mom", null, null, null, Gender.FEMALE, Affection.AFFECTED)), - String.format("%s%n%s%n%s", - "kid dad mom 1 2", - "dad 0 0 1 1", - "mom 0 0 2 2")); + + new PedReaderTestMissing("missingFam", + "fam1 kid dad mom 1 2", + EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID), + EnumSet.of(PedReader.Field.FAMILY_ID), + new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED)); + + new PedReaderTestMissing("missingParents", + "fam1 kid dad mom 1 2", + EnumSet.of(PedReader.MissingPedField.NO_PARENTS), + EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID), + new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)); + + new PedReaderTestMissing("missingSex", + "fam1 kid dad mom 1 2", + EnumSet.of(PedReader.MissingPedField.NO_SEX), + EnumSet.of(PedReader.Field.GENDER), + new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED)); + + new PedReaderTestMissing("missingPhenotype", + "fam1 kid dad mom 1 2", + EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE), + EnumSet.of(PedReader.Field.PHENOTYPE), + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN)); + + new PedReaderTestMissing("missingEverythingButGender", + "fam1 kid dad mom 1 2", + EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID), + EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE), + new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN)); + return PedReaderTestMissing.getTests(PedReaderTestMissing.class); } @Test(enabled = true, dataProvider = "readerTestMissing", dependsOnMethods = "testPedReader") - public void testPedReaderWithMissing(PedReaderTest test) { -// public enum MissingPedFields { -// NO_FAMILY_ID, -// NO_PARENTS, -// NO_SEX, -// NO_PHENOTYPE -// } -// runTest(test, sliceContents(0, test.fileContents), EnumSet.of(PedReader.MissingPedFields.NO_FAMILY_ID)); + public void testPedReaderWithMissing(PedReaderTestMissing test) { + final String contents = sliceContents(test.missingFields, test.fileContents); + logger.warn("Test " + test); + PedReader reader = new PedReader(); + SampleDataSource sampleDB = new SampleDataSource(); + reader.parse(new StringReader(contents), test.missingDesc, sampleDB); + final Sample missingSample = sampleDB.getSample("kid"); + Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test); + } + + private final static String sliceContents(EnumSet missingFieldsSet, String full) { + List parts = new ArrayList(Arrays.asList(full.split("\\s+"))); + final List missingFields = new ArrayList(missingFieldsSet); + Collections.reverse(missingFields); + for ( PedReader.Field field : missingFields ) + parts.remove(field.ordinal()); + return Utils.join("\t", parts); } } \ No newline at end of file