diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java index d57668715..8d19eb246 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java @@ -9,7 +9,7 @@ import java.util.Map; /** * */ -public class Sample implements java.io.Serializable { +public class Sample implements Comparable { // implements java.io.Serializable { final private String familyID, paternalID, maternalID; final private Gender gender; final private double quantitativePhenotype; @@ -118,6 +118,11 @@ public class Sample implements java.io.Serializable { return gender; } + @Override + public int compareTo(final Sample sample) { + return ID.compareTo(sample.getID()); + } + @Override public String toString() { return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s", diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index 5ba2252e4..2c63f93ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -4,7 +4,6 @@ import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import sun.reflect.generics.reflectiveObjects.NotImplementedException; import java.util.*; @@ -139,30 +138,42 @@ public class SampleDB { // // -------------------------------------------------------------------------------- - public Set getFamilyIDs() { - throw new NotImplementedException(); + /** + * Returns a sorted set of the family IDs in all samples (excluding null ids) + * @return + */ + public final Set getFamilyIDs() { + return getFamilies().keySet(); } - public Map> getFamilies() { - throw new NotImplementedException(); + /** + * Returns a map from family ID -> set of family members for all samples with + * non-null family ids + * + * @return + */ + public final Map> getFamilies() { + final Map> families = new TreeMap>(); + + for ( final Sample sample : samples.values() ) { + final String famID = sample.getFamilyID(); + if ( famID != null ) { + if ( ! families.containsKey(famID) ) + families.put(famID, new TreeSet()); + families.get(famID).add(sample); + } + } + + return families; } /** * Return all samples with a given family ID - * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this * @param familyId * @return */ public Set getFamily(String familyId) { - HashSet familyMembers = new HashSet(); - - for (Sample sample : samples.values()) { - if (sample.getFamilyID() != null) { - if (sample.getFamilyID().equals(familyId)) - familyMembers.add(sample); - } - } - return familyMembers; + return getFamilies().get(familyId); } /** @@ -172,9 +183,9 @@ public class SampleDB { * @return */ public Set getChildren(Sample sample) { - HashSet children = new HashSet(); - for (Sample familyMember : getFamily(sample.getFamilyID())) { - if (familyMember.getMother() == sample || familyMember.getFather() == sample) { + final HashSet children = new HashSet(); + for ( final Sample familyMember : getFamily(sample.getFamilyID())) { + if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) { children.add(familyMember); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java index b6b4fab54..d498ee61a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java @@ -9,9 +9,7 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.io.File; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -36,9 +34,17 @@ public class SampleDBUnitTest extends BaseTest { private static final String testPEDString = String.format("%s%n%s%n%s", - "fam1 kid dad mom 1 2", - "fam1 dad 0 0 1 1", - "fam1 mom 0 0 2 2"); + "fam1 kid dad mom 1 2", + "fam1 dad 0 0 1 1", + "fam1 mom 0 0 2 2"); + + private static final String testPEDMultipleFamilies = + String.format("%s%n%s%n%s%n%s%n%s", + "fam1 kid dad mom 1 2", + "fam1 dad 0 0 1 1", + "fam1 mom 0 0 2 2", + "fam3 s1 d1 m1 2 2", + "fam2 s2 d2 m2 2 2"); private static final String testPEDStringInconsistentGender = "fam1 kid 0 0 2 2"; @@ -117,4 +123,35 @@ public class SampleDBUnitTest extends BaseTest { builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender)); builder.getFinalSampleDB(); } + + @Test() + public void getFamilyIDs() { + builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); + SampleDB db = builder.getFinalSampleDB(); + Assert.assertEquals(db.getFamilyIDs(), new TreeSet(Arrays.asList("fam1", "fam2", "fam3"))); + } + + @Test() + public void getFamily() { + builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); + SampleDB db = builder.getFinalSampleDB(); + Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet); + } + + @Test() + public void loadFamilyIDs() { + builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); + SampleDB db = builder.getFinalSampleDB(); + Map> families = db.getFamilies(); + Assert.assertEquals(families.size(), 3); + Assert.assertEquals(families.keySet(), new TreeSet(Arrays.asList("fam1", "fam2", "fam3"))); + + for ( final String famID : families.keySet() ) { + final Set fam = families.get(famID); + Assert.assertEquals(fam.size(), 3); + for ( final Sample sample : fam ) { + Assert.assertEquals(sample.getFamilyID(), famID); + } + } + } }