diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index 31149cd8a..3de85028f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -168,13 +168,70 @@ public class SampleDB { return families; } + /** + * Returns all the trios present in the sample database. The strictOneChild parameter determines + * whether multiple children of the same parents resolve to multiple trios, or are excluded + * @param strictOneChild - exclude pedigrees with >1 child for parental pair + * @return - all of the mother+father=child triplets, subject to strictOneChild + */ + public final Set getTrios(boolean strictOneChild) { + Set trioSet = new HashSet(); + for ( String familyString : getFamilyIDs() ) { + Set family = getFamily(familyString); + for ( Sample sample : family) { + if ( sample.getParents().size() == 2 ) { + Trio trio = new Trio(sample.getMother(),sample.getFather(),sample); + trioSet.add(trio); + } + } + } + + if ( strictOneChild ) + trioSet = removeTriosWithSameParents(trioSet); + + return trioSet; + } + + /** + * Returns all the trios present in the db. See getTrios(boolean strictOneChild) + * @return all the trios present in the samples db. + */ + public final Set getTrios() { + return getTrios(false); + } + + /** + * Subsets a set of trios to only those with nonmatching founders. If two (or more) trio objects have + * the same mother and father, then both (all) are removed from the returned set. + * @param trios - a set of Trio objects + * @return those subset of Trio objects in the input set with nonmatching founders + */ + private Set removeTriosWithSameParents(final Set trios) { + Set filteredTrios = new HashSet(); + filteredTrios.addAll(trios); + Set triosWithSameParents = new HashSet(); + for ( Trio referenceTrio : filteredTrios ) { + for ( Trio compareTrio : filteredTrios ) { + if ( referenceTrio != compareTrio && + referenceTrio.getFather().equals(compareTrio.getFather()) && + referenceTrio.getMother().equals(compareTrio.getMother()) ) { + triosWithSameParents.add(referenceTrio); + triosWithSameParents.add(compareTrio); + } + } + } + filteredTrios.removeAll(triosWithSameParents); + return filteredTrios; + } /** * Returns the set of all children that have both of their parents. * Note that if a family is composed of more than 1 child, each child is * returned. * @return - all the children that have both of their parents + * @deprecated - getTrios() replaces this function */ + @Deprecated public final Set getChildrenWithParents(){ return getChildrenWithParents(false); } @@ -188,7 +245,15 @@ public class SampleDB { * * @param triosOnly - if set to true, only strict trios are returned * @return - all the children that have both of their parents + * @deprecated - getTrios(boolean strict) replaces this function + * @bug -- does not work for extracting multiple generations of trios, e.g. + * ..........Mom1------Dad1 + * ................| + * ..............Child1--------Mom2 + * .......................| + * .....................Child2 */ + @Deprecated public final Set getChildrenWithParents(boolean triosOnly) { Map> families = getFamilies(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java index 44a8600b0..612e342db 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java @@ -135,9 +135,8 @@ public class SampleDBBuilder { // -------------------------------------------------------------------------------- protected final void validate() { - if ( validationStrictness == PedigreeValidationType.SILENT ) - return; - else { + validatePedigreeIDUniqueness(); + if ( validationStrictness != PedigreeValidationType.SILENT ) { // check that samples in data sources are all annotated, if anything is annotated if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) { final Set sampleNamesFromPedigrees = new HashSet(); @@ -150,4 +149,12 @@ public class SampleDBBuilder { } } } + + private void validatePedigreeIDUniqueness() { + Set pedigreeIDs = new HashSet(); + for ( Sample sample : samplesFromPedigrees ) { + pedigreeIDs.add(sample.getID()); + } + assert pedigreeIDs.size() == samplesFromPedigrees.size() : "The number of sample IDs extracted from the pedigree does not equal the number of samples in the pedigree. Is a sample associated with multiple families?"; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java new file mode 100644 index 000000000..314baad3d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Trio.java @@ -0,0 +1,45 @@ +package org.broadinstitute.sting.gatk.samples; + +/** + * A class for imposing a trio structure on three samples; a common paradigm + * + * todo -- there should probably be an interface or abstract class "Pedigree" that generalizes the notion of + * -- imposing structure on samples. But given how complex pedigrees can quickly become, it's not + * -- clear the best way to do this. + */ +public class Trio { + private Sample mother; + private Sample father; + private Sample child; + + public Trio(Sample mom, Sample dad, Sample spawn) { + assert mom.getID().equals(spawn.getMaternalID()) && dad.getID().equals(spawn.getPaternalID()) : "Samples passed to trio constructor do not form a trio"; + mother = mom; + father = dad; + child = spawn; + } + + public Sample getMother() { + return mother; + } + + public String getMaternalID() { + return mother.getID(); + } + + public Sample getFather() { + return father; + } + + public String getPaternalID() { + return father.getID(); + } + + public Sample getChild() { + return child; + } + + public String getChildID() { + return child.getID(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index d6cf50522..f644c4c6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -3,8 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.gatk.samples.SampleDB; +import org.broadinstitute.sting.gatk.samples.Trio; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; @@ -39,7 +38,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment final VariantContext vc, final Map stratifiedPerReadAlleleLikelihoodMap) { if ( mendelianViolation == null ) { - trios = checkAndSetSamples(((Walker) walker).getSampleDB()); + trios = ((Walker) walker).getSampleDB().getTrios(); if ( trios.size() > 0 ) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); } @@ -53,7 +52,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment double maxMVLR = Double.MIN_VALUE; for ( Trio trio : trios ) { if ( contextHasTrioLikelihoods(vc,trio) ) { - Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId); + Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.getChildID()); maxMVLR = likR > maxMVLR ? likR : maxMVLR; //pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR))); } @@ -71,24 +70,9 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } - // todo - this entire function should be in samples DB - private Set checkAndSetSamples(SampleDB db){ - Set trioSet = new HashSet(); - for ( String familyString : db.getFamilyIDs() ) { - Set family = db.getFamily(familyString); - for ( Sample sample : family) { - if ( sample.getParents().size() == 2 ) { - Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID()); - trioSet.add(trio); - } - } - } - - return trioSet; - } private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) { - for ( String sample : trio ) { + for ( String sample : Arrays.asList(trio.getMaternalID(),trio.getPaternalID(),trio.getChildID()) ) { if ( ! context.hasGenotype(sample) ) return false; if ( ! context.getGenotype(sample).hasLikelihoods() ) @@ -98,47 +82,4 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment return true; } - // TODO -- this class is too much. - // TODO -- Why iterable? - // TODO -- shuoldn't this be in samplesDB() so you can just called samplesDB().getTrios() - // TODO -- should just have final string IDs, and getters, no setters - private class Trio implements Iterable { - private String maternalID; - private String paternalID; - private String childId; - - public Trio(String mom, String dad, String child) { - this.maternalID = mom; - this.paternalID = dad; - this.childId = child; - } - - public String getMaternalID() { - return this.maternalID; - } - - public String getPaternalID() { - return this.paternalID; - } - - public String getChildId() { - return this.childId; - } - - public void setMaternalID(String id) { - this.maternalID = id; - } - - public void setPaternalID(String id) { - this.paternalID = id; - } - - public void setChildId(String id) { - this.childId = id; - } - - public Iterator iterator() { - return Arrays.asList(maternalID,paternalID,childId).iterator(); - } - } }