Code improvements re: JIRA GSA-510. Trio class migrated into the Samples package - because the trio structure is so ubiquitously used, it makes sense, I think, to have a class which imposes the structure on the samples. Existing functions which slightly duplicated the getTrios() method look like they have bugs. These functions are now deprecated.
A number of functions int he sampleDB looked to be assuming that samples could not share IDs (e.g. sample IDs are unique, so a sample present in two families could not be represented by multiple Sample objects). Added an assertion in the SampleDBBuilder to document/test this assumption. MVLikelihoodRatio now uses the trio methods from SampleDB.
This commit is contained in:
parent
0996bbd548
commit
b59948709f
|
|
@ -168,13 +168,70 @@ public class SampleDB {
|
|||
return families;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all the trios present in the sample database. The strictOneChild parameter determines
|
||||
* whether multiple children of the same parents resolve to multiple trios, or are excluded
|
||||
* @param strictOneChild - exclude pedigrees with >1 child for parental pair
|
||||
* @return - all of the mother+father=child triplets, subject to strictOneChild
|
||||
*/
|
||||
public final Set<Trio> getTrios(boolean strictOneChild) {
|
||||
Set<Trio> trioSet = new HashSet<Trio>();
|
||||
for ( String familyString : getFamilyIDs() ) {
|
||||
Set<Sample> family = getFamily(familyString);
|
||||
for ( Sample sample : family) {
|
||||
if ( sample.getParents().size() == 2 ) {
|
||||
Trio trio = new Trio(sample.getMother(),sample.getFather(),sample);
|
||||
trioSet.add(trio);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( strictOneChild )
|
||||
trioSet = removeTriosWithSameParents(trioSet);
|
||||
|
||||
return trioSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all the trios present in the db. See getTrios(boolean strictOneChild)
|
||||
* @return all the trios present in the samples db.
|
||||
*/
|
||||
public final Set<Trio> getTrios() {
|
||||
return getTrios(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Subsets a set of trios to only those with nonmatching founders. If two (or more) trio objects have
|
||||
* the same mother and father, then both (all) are removed from the returned set.
|
||||
* @param trios - a set of Trio objects
|
||||
* @return those subset of Trio objects in the input set with nonmatching founders
|
||||
*/
|
||||
private Set<Trio> removeTriosWithSameParents(final Set<Trio> trios) {
|
||||
Set<Trio> filteredTrios = new HashSet<Trio>();
|
||||
filteredTrios.addAll(trios);
|
||||
Set<Trio> triosWithSameParents = new HashSet<Trio>();
|
||||
for ( Trio referenceTrio : filteredTrios ) {
|
||||
for ( Trio compareTrio : filteredTrios ) {
|
||||
if ( referenceTrio != compareTrio &&
|
||||
referenceTrio.getFather().equals(compareTrio.getFather()) &&
|
||||
referenceTrio.getMother().equals(compareTrio.getMother()) ) {
|
||||
triosWithSameParents.add(referenceTrio);
|
||||
triosWithSameParents.add(compareTrio);
|
||||
}
|
||||
}
|
||||
}
|
||||
filteredTrios.removeAll(triosWithSameParents);
|
||||
return filteredTrios;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of all children that have both of their parents.
|
||||
* Note that if a family is composed of more than 1 child, each child is
|
||||
* returned.
|
||||
* @return - all the children that have both of their parents
|
||||
* @deprecated - getTrios() replaces this function
|
||||
*/
|
||||
@Deprecated
|
||||
public final Set<Sample> getChildrenWithParents(){
|
||||
return getChildrenWithParents(false);
|
||||
}
|
||||
|
|
@ -188,7 +245,15 @@ public class SampleDB {
|
|||
*
|
||||
* @param triosOnly - if set to true, only strict trios are returned
|
||||
* @return - all the children that have both of their parents
|
||||
* @deprecated - getTrios(boolean strict) replaces this function
|
||||
* @bug -- does not work for extracting multiple generations of trios, e.g.
|
||||
* ..........Mom1------Dad1
|
||||
* ................|
|
||||
* ..............Child1--------Mom2
|
||||
* .......................|
|
||||
* .....................Child2
|
||||
*/
|
||||
@Deprecated
|
||||
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
|
||||
|
||||
Map<String, Set<Sample>> families = getFamilies();
|
||||
|
|
|
|||
|
|
@ -135,9 +135,8 @@ public class SampleDBBuilder {
|
|||
// --------------------------------------------------------------------------------
|
||||
|
||||
protected final void validate() {
|
||||
if ( validationStrictness == PedigreeValidationType.SILENT )
|
||||
return;
|
||||
else {
|
||||
validatePedigreeIDUniqueness();
|
||||
if ( validationStrictness != PedigreeValidationType.SILENT ) {
|
||||
// check that samples in data sources are all annotated, if anything is annotated
|
||||
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
|
||||
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
|
||||
|
|
@ -150,4 +149,12 @@ public class SampleDBBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void validatePedigreeIDUniqueness() {
|
||||
Set<String> pedigreeIDs = new HashSet<String>();
|
||||
for ( Sample sample : samplesFromPedigrees ) {
|
||||
pedigreeIDs.add(sample.getID());
|
||||
}
|
||||
assert pedigreeIDs.size() == samplesFromPedigrees.size() : "The number of sample IDs extracted from the pedigree does not equal the number of samples in the pedigree. Is a sample associated with multiple families?";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* A class for imposing a trio structure on three samples; a common paradigm
|
||||
*
|
||||
* todo -- there should probably be an interface or abstract class "Pedigree" that generalizes the notion of
|
||||
* -- imposing structure on samples. But given how complex pedigrees can quickly become, it's not
|
||||
* -- clear the best way to do this.
|
||||
*/
|
||||
public class Trio {
|
||||
private Sample mother;
|
||||
private Sample father;
|
||||
private Sample child;
|
||||
|
||||
public Trio(Sample mom, Sample dad, Sample spawn) {
|
||||
assert mom.getID().equals(spawn.getMaternalID()) && dad.getID().equals(spawn.getPaternalID()) : "Samples passed to trio constructor do not form a trio";
|
||||
mother = mom;
|
||||
father = dad;
|
||||
child = spawn;
|
||||
}
|
||||
|
||||
public Sample getMother() {
|
||||
return mother;
|
||||
}
|
||||
|
||||
public String getMaternalID() {
|
||||
return mother.getID();
|
||||
}
|
||||
|
||||
public Sample getFather() {
|
||||
return father;
|
||||
}
|
||||
|
||||
public String getPaternalID() {
|
||||
return father.getID();
|
||||
}
|
||||
|
||||
public Sample getChild() {
|
||||
return child;
|
||||
}
|
||||
|
||||
public String getChildID() {
|
||||
return child.getID();
|
||||
}
|
||||
}
|
||||
|
|
@ -3,8 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.samples.Trio;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
|
|
@ -39,7 +38,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( mendelianViolation == null ) {
|
||||
trios = checkAndSetSamples(((Walker) walker).getSampleDB());
|
||||
trios = ((Walker) walker).getSampleDB().getTrios();
|
||||
if ( trios.size() > 0 ) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
|
|
@ -53,7 +52,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
double maxMVLR = Double.MIN_VALUE;
|
||||
for ( Trio trio : trios ) {
|
||||
if ( contextHasTrioLikelihoods(vc,trio) ) {
|
||||
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId);
|
||||
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.getChildID());
|
||||
maxMVLR = likR > maxMVLR ? likR : maxMVLR;
|
||||
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
|
||||
}
|
||||
|
|
@ -71,24 +70,9 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
|
||||
// todo - this entire function should be in samples DB
|
||||
private Set<Trio> checkAndSetSamples(SampleDB db){
|
||||
Set<Trio> trioSet = new HashSet<Trio>();
|
||||
for ( String familyString : db.getFamilyIDs() ) {
|
||||
Set<Sample> family = db.getFamily(familyString);
|
||||
for ( Sample sample : family) {
|
||||
if ( sample.getParents().size() == 2 ) {
|
||||
Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID());
|
||||
trioSet.add(trio);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trioSet;
|
||||
}
|
||||
|
||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||
for ( String sample : trio ) {
|
||||
for ( String sample : Arrays.asList(trio.getMaternalID(),trio.getPaternalID(),trio.getChildID()) ) {
|
||||
if ( ! context.hasGenotype(sample) )
|
||||
return false;
|
||||
if ( ! context.getGenotype(sample).hasLikelihoods() )
|
||||
|
|
@ -98,47 +82,4 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
return true;
|
||||
}
|
||||
|
||||
// TODO -- this class is too much.
|
||||
// TODO -- Why iterable?
|
||||
// TODO -- shuoldn't this be in samplesDB() so you can just called samplesDB().getTrios()
|
||||
// TODO -- should just have final string IDs, and getters, no setters
|
||||
private class Trio implements Iterable<String> {
|
||||
private String maternalID;
|
||||
private String paternalID;
|
||||
private String childId;
|
||||
|
||||
public Trio(String mom, String dad, String child) {
|
||||
this.maternalID = mom;
|
||||
this.paternalID = dad;
|
||||
this.childId = child;
|
||||
}
|
||||
|
||||
public String getMaternalID() {
|
||||
return this.maternalID;
|
||||
}
|
||||
|
||||
public String getPaternalID() {
|
||||
return this.paternalID;
|
||||
}
|
||||
|
||||
public String getChildId() {
|
||||
return this.childId;
|
||||
}
|
||||
|
||||
public void setMaternalID(String id) {
|
||||
this.maternalID = id;
|
||||
}
|
||||
|
||||
public void setPaternalID(String id) {
|
||||
this.paternalID = id;
|
||||
}
|
||||
|
||||
public void setChildId(String id) {
|
||||
this.childId = id;
|
||||
}
|
||||
|
||||
public Iterator<String> iterator() {
|
||||
return Arrays.asList(maternalID,paternalID,childId).iterator();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue