Code improvements re: JIRA GSA-510. Trio class migrated into the Samples package - because the trio structure is so ubiquitously used, it makes sense, I think, to have a class which imposes the structure on the samples. Existing functions which slightly duplicated the getTrios() method look like they have bugs. These functions are now deprecated.
A number of functions int he sampleDB looked to be assuming that samples could not share IDs (e.g. sample IDs are unique, so a sample present in two families could not be represented by multiple Sample objects). Added an assertion in the SampleDBBuilder to document/test this assumption. MVLikelihoodRatio now uses the trio methods from SampleDB.
This commit is contained in:
parent
0996bbd548
commit
b59948709f
|
|
@ -168,13 +168,70 @@ public class SampleDB {
|
||||||
return families;
|
return families;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all the trios present in the sample database. The strictOneChild parameter determines
|
||||||
|
* whether multiple children of the same parents resolve to multiple trios, or are excluded
|
||||||
|
* @param strictOneChild - exclude pedigrees with >1 child for parental pair
|
||||||
|
* @return - all of the mother+father=child triplets, subject to strictOneChild
|
||||||
|
*/
|
||||||
|
public final Set<Trio> getTrios(boolean strictOneChild) {
|
||||||
|
Set<Trio> trioSet = new HashSet<Trio>();
|
||||||
|
for ( String familyString : getFamilyIDs() ) {
|
||||||
|
Set<Sample> family = getFamily(familyString);
|
||||||
|
for ( Sample sample : family) {
|
||||||
|
if ( sample.getParents().size() == 2 ) {
|
||||||
|
Trio trio = new Trio(sample.getMother(),sample.getFather(),sample);
|
||||||
|
trioSet.add(trio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( strictOneChild )
|
||||||
|
trioSet = removeTriosWithSameParents(trioSet);
|
||||||
|
|
||||||
|
return trioSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all the trios present in the db. See getTrios(boolean strictOneChild)
|
||||||
|
* @return all the trios present in the samples db.
|
||||||
|
*/
|
||||||
|
public final Set<Trio> getTrios() {
|
||||||
|
return getTrios(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subsets a set of trios to only those with nonmatching founders. If two (or more) trio objects have
|
||||||
|
* the same mother and father, then both (all) are removed from the returned set.
|
||||||
|
* @param trios - a set of Trio objects
|
||||||
|
* @return those subset of Trio objects in the input set with nonmatching founders
|
||||||
|
*/
|
||||||
|
private Set<Trio> removeTriosWithSameParents(final Set<Trio> trios) {
|
||||||
|
Set<Trio> filteredTrios = new HashSet<Trio>();
|
||||||
|
filteredTrios.addAll(trios);
|
||||||
|
Set<Trio> triosWithSameParents = new HashSet<Trio>();
|
||||||
|
for ( Trio referenceTrio : filteredTrios ) {
|
||||||
|
for ( Trio compareTrio : filteredTrios ) {
|
||||||
|
if ( referenceTrio != compareTrio &&
|
||||||
|
referenceTrio.getFather().equals(compareTrio.getFather()) &&
|
||||||
|
referenceTrio.getMother().equals(compareTrio.getMother()) ) {
|
||||||
|
triosWithSameParents.add(referenceTrio);
|
||||||
|
triosWithSameParents.add(compareTrio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filteredTrios.removeAll(triosWithSameParents);
|
||||||
|
return filteredTrios;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the set of all children that have both of their parents.
|
* Returns the set of all children that have both of their parents.
|
||||||
* Note that if a family is composed of more than 1 child, each child is
|
* Note that if a family is composed of more than 1 child, each child is
|
||||||
* returned.
|
* returned.
|
||||||
* @return - all the children that have both of their parents
|
* @return - all the children that have both of their parents
|
||||||
|
* @deprecated - getTrios() replaces this function
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public final Set<Sample> getChildrenWithParents(){
|
public final Set<Sample> getChildrenWithParents(){
|
||||||
return getChildrenWithParents(false);
|
return getChildrenWithParents(false);
|
||||||
}
|
}
|
||||||
|
|
@ -188,7 +245,15 @@ public class SampleDB {
|
||||||
*
|
*
|
||||||
* @param triosOnly - if set to true, only strict trios are returned
|
* @param triosOnly - if set to true, only strict trios are returned
|
||||||
* @return - all the children that have both of their parents
|
* @return - all the children that have both of their parents
|
||||||
|
* @deprecated - getTrios(boolean strict) replaces this function
|
||||||
|
* @bug -- does not work for extracting multiple generations of trios, e.g.
|
||||||
|
* ..........Mom1------Dad1
|
||||||
|
* ................|
|
||||||
|
* ..............Child1--------Mom2
|
||||||
|
* .......................|
|
||||||
|
* .....................Child2
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
|
public final Set<Sample> getChildrenWithParents(boolean triosOnly) {
|
||||||
|
|
||||||
Map<String, Set<Sample>> families = getFamilies();
|
Map<String, Set<Sample>> families = getFamilies();
|
||||||
|
|
|
||||||
|
|
@ -135,9 +135,8 @@ public class SampleDBBuilder {
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
protected final void validate() {
|
protected final void validate() {
|
||||||
if ( validationStrictness == PedigreeValidationType.SILENT )
|
validatePedigreeIDUniqueness();
|
||||||
return;
|
if ( validationStrictness != PedigreeValidationType.SILENT ) {
|
||||||
else {
|
|
||||||
// check that samples in data sources are all annotated, if anything is annotated
|
// check that samples in data sources are all annotated, if anything is annotated
|
||||||
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
|
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
|
||||||
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
|
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
|
||||||
|
|
@ -150,4 +149,12 @@ public class SampleDBBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void validatePedigreeIDUniqueness() {
|
||||||
|
Set<String> pedigreeIDs = new HashSet<String>();
|
||||||
|
for ( Sample sample : samplesFromPedigrees ) {
|
||||||
|
pedigreeIDs.add(sample.getID());
|
||||||
|
}
|
||||||
|
assert pedigreeIDs.size() == samplesFromPedigrees.size() : "The number of sample IDs extracted from the pedigree does not equal the number of samples in the pedigree. Is a sample associated with multiple families?";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
package org.broadinstitute.sting.gatk.samples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class for imposing a trio structure on three samples; a common paradigm
|
||||||
|
*
|
||||||
|
* todo -- there should probably be an interface or abstract class "Pedigree" that generalizes the notion of
|
||||||
|
* -- imposing structure on samples. But given how complex pedigrees can quickly become, it's not
|
||||||
|
* -- clear the best way to do this.
|
||||||
|
*/
|
||||||
|
public class Trio {
|
||||||
|
private Sample mother;
|
||||||
|
private Sample father;
|
||||||
|
private Sample child;
|
||||||
|
|
||||||
|
public Trio(Sample mom, Sample dad, Sample spawn) {
|
||||||
|
assert mom.getID().equals(spawn.getMaternalID()) && dad.getID().equals(spawn.getPaternalID()) : "Samples passed to trio constructor do not form a trio";
|
||||||
|
mother = mom;
|
||||||
|
father = dad;
|
||||||
|
child = spawn;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Sample getMother() {
|
||||||
|
return mother;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMaternalID() {
|
||||||
|
return mother.getID();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Sample getFather() {
|
||||||
|
return father;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getPaternalID() {
|
||||||
|
return father.getID();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Sample getChild() {
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getChildID() {
|
||||||
|
return child.getID();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -3,8 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
import org.broadinstitute.sting.gatk.samples.Trio;
|
||||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
|
|
@ -39,7 +38,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
trios = checkAndSetSamples(((Walker) walker).getSampleDB());
|
trios = ((Walker) walker).getSampleDB().getTrios();
|
||||||
if ( trios.size() > 0 ) {
|
if ( trios.size() > 0 ) {
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
|
|
@ -53,7 +52,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
double maxMVLR = Double.MIN_VALUE;
|
double maxMVLR = Double.MIN_VALUE;
|
||||||
for ( Trio trio : trios ) {
|
for ( Trio trio : trios ) {
|
||||||
if ( contextHasTrioLikelihoods(vc,trio) ) {
|
if ( contextHasTrioLikelihoods(vc,trio) ) {
|
||||||
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.childId);
|
Double likR = mendelianViolation.violationLikelihoodRatio(vc,trio.getMaternalID(),trio.getPaternalID(),trio.getChildID());
|
||||||
maxMVLR = likR > maxMVLR ? likR : maxMVLR;
|
maxMVLR = likR > maxMVLR ? likR : maxMVLR;
|
||||||
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
|
//pNoMV *= (1.0-Math.pow(10.0,likR)/(1+Math.pow(10.0,likR)));
|
||||||
}
|
}
|
||||||
|
|
@ -71,24 +70,9 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||||
|
|
||||||
// todo - this entire function should be in samples DB
|
|
||||||
private Set<Trio> checkAndSetSamples(SampleDB db){
|
|
||||||
Set<Trio> trioSet = new HashSet<Trio>();
|
|
||||||
for ( String familyString : db.getFamilyIDs() ) {
|
|
||||||
Set<Sample> family = db.getFamily(familyString);
|
|
||||||
for ( Sample sample : family) {
|
|
||||||
if ( sample.getParents().size() == 2 ) {
|
|
||||||
Trio trio = new Trio(sample.getMaternalID(),sample.getPaternalID(),sample.getID());
|
|
||||||
trioSet.add(trio);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return trioSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||||
for ( String sample : trio ) {
|
for ( String sample : Arrays.asList(trio.getMaternalID(),trio.getPaternalID(),trio.getChildID()) ) {
|
||||||
if ( ! context.hasGenotype(sample) )
|
if ( ! context.hasGenotype(sample) )
|
||||||
return false;
|
return false;
|
||||||
if ( ! context.getGenotype(sample).hasLikelihoods() )
|
if ( ! context.getGenotype(sample).hasLikelihoods() )
|
||||||
|
|
@ -98,47 +82,4 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO -- this class is too much.
|
|
||||||
// TODO -- Why iterable?
|
|
||||||
// TODO -- shuoldn't this be in samplesDB() so you can just called samplesDB().getTrios()
|
|
||||||
// TODO -- should just have final string IDs, and getters, no setters
|
|
||||||
private class Trio implements Iterable<String> {
|
|
||||||
private String maternalID;
|
|
||||||
private String paternalID;
|
|
||||||
private String childId;
|
|
||||||
|
|
||||||
public Trio(String mom, String dad, String child) {
|
|
||||||
this.maternalID = mom;
|
|
||||||
this.paternalID = dad;
|
|
||||||
this.childId = child;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getMaternalID() {
|
|
||||||
return this.maternalID;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getPaternalID() {
|
|
||||||
return this.paternalID;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getChildId() {
|
|
||||||
return this.childId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMaternalID(String id) {
|
|
||||||
this.maternalID = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setPaternalID(String id) {
|
|
||||||
this.paternalID = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setChildId(String id) {
|
|
||||||
this.childId = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<String> iterator() {
|
|
||||||
return Arrays.asList(maternalID,paternalID,childId).iterator();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue