2011-03-15 00:43:37 +08:00
package org.broadinstitute.sting.utils ;
2011-09-29 21:57:15 +08:00
import org.broadinstitute.sting.gatk.samples.Sample ;
2011-07-18 08:29:58 +08:00
import org.broadinstitute.sting.utils.variantcontext.Genotype ;
import org.broadinstitute.sting.utils.variantcontext.VariantContext ;
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
import java.util.* ;
2011-03-15 00:43:37 +08:00
/ * *
2011-11-29 00:13:14 +08:00
* User : carneiro / lfran
2011-03-15 00:43:37 +08:00
* Date : 3 / 9 / 11
* Time : 12 : 38 PM
2011-11-29 00:13:14 +08:00
*
* Class for the identification and tracking of mendelian violation . It can be used in 2 distinct ways :
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
* walking over the variants
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
*
2011-03-15 00:43:37 +08:00
* /
public class MendelianViolation {
2011-11-29 00:13:14 +08:00
//List of families with violations
private List < String > violationFamilies ;
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
//Call information
private int nocall = 0 ;
private int familyCalled = 0 ;
private int varFamilyCalled = 0 ;
private int lowQual = 0 ;
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
private boolean allCalledOnly = true ;
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
//Stores occurrences of inheritance
private EnumMap < Genotype . Type , EnumMap < Genotype . Type , EnumMap < Genotype . Type , Integer > > > inheritance ;
private int violations_total = 0 ;
private double minGenotypeQuality ;
private boolean abortOnSampleNotFound ;
//Number of families with genotype information for all members
public int getFamilyCalledCount ( ) {
return familyCalled ;
}
//Number of families with genotype information for all members
public int getVarFamilyCalledCount ( ) {
return varFamilyCalled ;
}
//Number of families missing genotypes for one or more of their members
public int getFamilyNoCallCount ( ) {
return nocall ;
}
//Number of families with genotypes below the set quality threshold
public int getFamilyLowQualsCount ( ) {
return lowQual ;
}
public int getViolationsCount ( ) {
return violations_total ;
}
//Count of alt alleles inherited from het parents (no violation)
public int getParentHetInheritedVar ( ) {
return getParentsHetHetInheritedVar ( ) + getParentsRefHetInheritedVar ( ) + getParentsVarHetInheritedVar ( ) ;
}
//Count of ref alleles inherited from het parents (no violation)
public int getParentHetInheritedRef ( ) {
return getParentsHetHetInheritedRef ( ) + getParentsRefHetInheritedRef ( ) + getParentsVarHetInheritedRef ( ) ;
}
//Count of HomRef/HomRef/HomRef trios
public int getRefRefRef ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_REF ) ;
}
//Count of HomVar/HomVar/HomVar trios
public int getVarVarVar ( ) {
return inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_VAR ) ;
}
//Count of HomRef/HomVar/Het trios
public int getRefVarHet ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HET ) +
inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HET ) ;
}
//Count of Het/Het/Het trios
public int getHetHetHet ( ) {
return inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) ;
}
//Count of Het/Het/HomRef trios
public int getHetHetHomRef ( ) {
return inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF ) ;
}
//Count of Het/Het/HomVar trios
public int getHetHetHomVar ( ) {
return inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR ) ;
}
//Count of ref alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedRef ( ) {
return inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET )
+ 2 * inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF ) ;
//return parentsHetHet_childRef;
}
//Count of var alleles inherited from Het/Het parents (no violation)
public int getParentsHetHetInheritedVar ( ) {
return inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET )
+ 2 * inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR ) ;
//return parentsHetHet_childVar;
}
//Count of ref alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedRef ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF )
+ inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_REF ) ;
//return parentsHomRefHet_childRef;
}
//Count of var alleles inherited from HomRef/Het parents (no violation)
public int getParentsRefHetInheritedVar ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET )
+ inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HET ) ;
//return parentsHomRefHet_childVar;
}
//Count of ref alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedRef ( ) {
return inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HET )
+ inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HET ) ;
//return parentsHomVarHet_childRef;
}
//Count of var alleles inherited from HomVar/Het parents (no violation)
public int getParentsVarHetInheritedVar ( ) {
return inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR )
+ inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_VAR ) ;
//return parentsHomVarHet_childVar;
}
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
public int getParentsRefRefChildVar ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_VAR ) ;
}
//Count of violations of the type HOM_REF/HOM_REF -> HET
public int getParentsRefRefChildHet ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HET ) ;
}
//Count of violations of the type HOM_REF/HET -> HOM_VAR
public int getParentsRefHetChildVar ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR )
+ inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_VAR ) ;
}
2011-10-20 05:42:37 +08:00
2011-11-29 00:13:14 +08:00
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
public int getParentsRefVarChildVar ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_VAR )
+ inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_VAR ) ;
}
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
public int getParentsRefVarChildRef ( ) {
return inheritance . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_REF )
+ inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_REF ) . get ( Genotype . Type . HOM_REF ) ;
2011-03-16 02:36:55 +08:00
}
2011-11-29 00:13:14 +08:00
//Count of violations of the type HOM_VAR/HET -> HOM_REF
public int getParentsVarHetChildRef ( ) {
return inheritance . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_REF )
+ inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HET ) . get ( Genotype . Type . HOM_REF ) ;
2011-03-16 02:36:55 +08:00
}
2011-11-29 00:13:14 +08:00
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
public int getParentsVarVarChildRef ( ) {
return inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_REF ) ;
2011-03-16 02:36:55 +08:00
}
2011-11-29 00:13:14 +08:00
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
public int getParentsVarVarChildHet ( ) {
return inheritance . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HOM_VAR ) . get ( Genotype . Type . HET ) ;
}
//Count of violations of the type HOM_VAR/? -> HOM_REF
public int getParentVarChildRef ( ) {
return getParentsRefVarChildRef ( ) + getParentsVarHetChildRef ( ) + getParentsVarVarChildRef ( ) ;
}
//Count of violations of the type HOM_REF/? -> HOM_VAR
public int getParentRefChildVar ( ) {
return getParentsRefVarChildVar ( ) + getParentsRefHetChildVar ( ) + getParentsRefRefChildVar ( ) ;
}
//Returns a String containing all trios where a Mendelian violation was observed.
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
public String getViolationFamiliesString ( ) {
if ( violationFamilies . isEmpty ( ) )
return "" ;
Iterator < String > it = violationFamilies . iterator ( ) ;
String violationFams = it . next ( ) ;
while ( it . hasNext ( ) ) {
violationFams + = "," + it . next ( ) ;
}
return violationFams ;
}
public List < String > getViolationFamilies ( ) {
return violationFamilies ;
}
static final int [ ] mvOffsets = new int [ ] { 1 , 2 , 5 , 6 , 8 , 11 , 15 , 18 , 20 , 21 , 24 , 25 } ;
static final int [ ] nonMVOffsets = new int [ ] { 0 , 3 , 4 , 7 , 9 , 10 , 12 , 13 , 14 , 16 , 17 , 19 , 22 , 23 , 26 } ;
2011-03-16 02:36:55 +08:00
public double getMinGenotypeQuality ( ) {
return minGenotypeQuality ;
}
2011-11-29 00:13:14 +08:00
/ * *
* Constructor
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
2011-03-15 00:43:37 +08:00
*
* /
2011-11-29 00:13:14 +08:00
public MendelianViolation ( double minGenotypeQualityP ) {
this ( minGenotypeQualityP , true ) ;
2011-03-15 00:43:37 +08:00
}
/ * *
2011-11-29 00:13:14 +08:00
* Constructor
2011-03-15 00:43:37 +08:00
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
2011-11-29 00:13:14 +08:00
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found . If false , then the family is ignored .
2011-03-15 00:43:37 +08:00
* /
2011-11-29 00:13:14 +08:00
public MendelianViolation ( double minGenotypeQualityP , boolean abortOnSampleNotFound ) {
2011-03-15 00:43:37 +08:00
minGenotypeQuality = minGenotypeQualityP ;
2011-11-29 00:13:14 +08:00
this . abortOnSampleNotFound = abortOnSampleNotFound ;
violationFamilies = new ArrayList < String > ( ) ;
createInheritanceMap ( ) ;
2011-03-15 00:43:37 +08:00
}
/ * *
2011-11-29 00:13:14 +08:00
* Constructor
2011-03-15 00:43:37 +08:00
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
2011-11-29 00:13:14 +08:00
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found . If false , then the family is ignored .
* @param completeTriosOnly - whether only complete trios are considered or parent / child pairs are too .
2011-03-15 00:43:37 +08:00
* /
2011-11-29 00:13:14 +08:00
public MendelianViolation ( double minGenotypeQualityP , boolean abortOnSampleNotFound , boolean completeTriosOnly ) {
2011-03-15 00:43:37 +08:00
minGenotypeQuality = minGenotypeQualityP ;
2011-11-29 00:13:14 +08:00
this . abortOnSampleNotFound = abortOnSampleNotFound ;
violationFamilies = new ArrayList < String > ( ) ;
createInheritanceMap ( ) ;
allCalledOnly = completeTriosOnly ;
2011-03-15 00:43:37 +08:00
}
/ * *
2011-11-29 00:13:14 +08:00
* @param families the families to be checked for Mendelian violations
* @param vc the variant context to extract the genotypes and alleles for mom , dad and child .
* @return whether or not there is a mendelian violation at the site .
2011-03-15 00:43:37 +08:00
* /
2011-11-29 00:13:14 +08:00
public int countViolations ( Map < String , Set < Sample > > families , VariantContext vc ) {
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
//Reset counts
nocall = 0 ;
lowQual = 0 ;
familyCalled = 0 ;
varFamilyCalled = 0 ;
violations_total = 0 ;
violationFamilies . clear ( ) ;
clearInheritanceMap ( ) ;
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
for ( Set < Sample > family : families . values ( ) ) {
Iterator < Sample > sampleIterator = family . iterator ( ) ;
Sample sample ;
while ( sampleIterator . hasNext ( ) ) {
sample = sampleIterator . next ( ) ;
if ( sample . getParents ( ) . size ( ) > 0 )
updateViolations ( sample . getFamilyID ( ) , sample . getMaternalID ( ) , sample . getPaternalID ( ) , sample . getID ( ) , vc ) ;
}
2011-03-15 00:43:37 +08:00
}
2011-11-29 00:13:14 +08:00
return violations_total ;
}
2011-03-15 00:43:37 +08:00
2011-11-29 00:13:14 +08:00
public boolean isViolation ( Sample mother , Sample father , Sample child , VariantContext vc ) {
//Reset counts
nocall = 0 ;
lowQual = 0 ;
familyCalled = 0 ;
varFamilyCalled = 0 ;
violations_total = 0 ;
violationFamilies . clear ( ) ;
clearInheritanceMap ( ) ;
updateViolations ( mother . getFamilyID ( ) , mother . getID ( ) , father . getID ( ) , child . getID ( ) , vc ) ;
return violations_total > 0 ;
2011-03-15 00:43:37 +08:00
}
2011-11-29 00:13:14 +08:00
private void updateViolations ( String familyId , String motherId , String fatherId , String childId , VariantContext vc ) {
int count ;
Genotype gMom = vc . getGenotype ( motherId ) ;
Genotype gDad = vc . getGenotype ( fatherId ) ;
Genotype gChild = vc . getGenotype ( childId ) ;
if ( gMom = = null | | gDad = = null | | gChild = = null ) {
if ( abortOnSampleNotFound )
throw new IllegalArgumentException ( String . format ( "Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s" , vc . getChr ( ) , vc . getStart ( ) , familyId , motherId , fatherId , childId ) ) ;
else
return ;
}
//Count No calls
if ( allCalledOnly & & ( ! gMom . isCalled ( ) | | ! gDad . isCalled ( ) | | ! gChild . isCalled ( ) ) ) {
nocall + + ;
}
else if ( ! gMom . isCalled ( ) & & ! gDad . isCalled ( ) | | ! gChild . isCalled ( ) ) {
nocall + + ;
}
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
else if ( minGenotypeQuality > 0 & & ( gMom . getPhredScaledQual ( ) < minGenotypeQuality | |
gDad . getPhredScaledQual ( ) < minGenotypeQuality | |
gChild . getPhredScaledQual ( ) < minGenotypeQuality ) ) {
lowQual + + ;
}
else {
//Count all families per loci called
familyCalled + + ;
//If the family is all homref, not too interesting
if ( ! ( gMom . isHomRef ( ) & & gDad . isHomRef ( ) & & gChild . isHomRef ( ) ) )
{
varFamilyCalled + + ;
if ( isViolation ( gMom , gDad , gChild ) ) {
violationFamilies . add ( familyId ) ;
violations_total + + ;
}
}
count = inheritance . get ( gMom . getType ( ) ) . get ( gDad . getType ( ) ) . get ( gChild . getType ( ) ) ;
inheritance . get ( gMom . getType ( ) ) . get ( gDad . getType ( ) ) . put ( gChild . getType ( ) , count + 1 ) ;
}
2011-03-15 00:43:37 +08:00
}
2011-11-29 00:13:14 +08:00
private boolean isViolation ( Genotype gMom , Genotype gDad , Genotype gChild ) {
//1 parent is no "call
if ( ! gMom . isCalled ( ) ) {
return ( gDad . isHomRef ( ) & & gChild . isHomVar ( ) ) | | ( gDad . isHomVar ( ) & & gChild . isHomRef ( ) ) ;
}
else if ( ! gDad . isCalled ( ) ) {
return ( gMom . isHomRef ( ) & & gChild . isHomVar ( ) ) | | ( gMom . isHomVar ( ) & & gChild . isHomRef ( ) ) ;
}
//Both parents have genotype information
return ! ( gMom . getAlleles ( ) . contains ( gChild . getAlleles ( ) . get ( 0 ) ) & & gDad . getAlleles ( ) . contains ( gChild . getAlleles ( ) . get ( 1 ) ) | |
gMom . getAlleles ( ) . contains ( gChild . getAlleles ( ) . get ( 1 ) ) & & gDad . getAlleles ( ) . contains ( gChild . getAlleles ( ) . get ( 0 ) ) ) ;
}
private void createInheritanceMap ( ) {
inheritance = new EnumMap < Genotype . Type , EnumMap < Genotype . Type , EnumMap < Genotype . Type , Integer > > > ( Genotype . Type . class ) ;
for ( Genotype . Type mType : Genotype . Type . values ( ) ) {
inheritance . put ( mType , new EnumMap < Genotype . Type , EnumMap < Genotype . Type , Integer > > ( Genotype . Type . class ) ) ;
for ( Genotype . Type dType : Genotype . Type . values ( ) ) {
inheritance . get ( mType ) . put ( dType , new EnumMap < Genotype . Type , Integer > ( Genotype . Type . class ) ) ;
for ( Genotype . Type cType : Genotype . Type . values ( ) ) {
inheritance . get ( mType ) . get ( dType ) . put ( cType , 0 ) ;
}
}
}
}
private void clearInheritanceMap ( ) {
for ( Genotype . Type mType : Genotype . Type . values ( ) ) {
for ( Genotype . Type dType : Genotype . Type . values ( ) ) {
for ( Genotype . Type cType : Genotype . Type . values ( ) ) {
inheritance . get ( mType ) . get ( dType ) . put ( cType , 0 ) ;
}
}
}
2011-03-15 00:43:37 +08:00
}
2011-10-20 05:42:37 +08:00
/ * *
* @return the likelihood ratio for a mendelian violation
* /
2011-11-29 00:13:14 +08:00
public double violationLikelihoodRatio ( VariantContext vc , String motherId , String fatherId , String childId ) {
2011-10-20 05:42:37 +08:00
double [ ] logLikAssignments = new double [ 27 ] ;
// the matrix to set up is
// MOM DAD CHILD
// |- AA
// AA AA | AB
// |- BB
// |- AA
// AA AB | AB
// |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
2011-11-29 00:13:14 +08:00
double [ ] momGL = vc . getGenotype ( motherId ) . getLikelihoods ( ) . getAsVector ( ) ;
double [ ] dadGL = vc . getGenotype ( fatherId ) . getLikelihoods ( ) . getAsVector ( ) ;
double [ ] childGL = vc . getGenotype ( childId ) . getLikelihoods ( ) . getAsVector ( ) ;
2011-10-20 05:42:37 +08:00
int offset = 0 ;
for ( int oMom = 0 ; oMom < 3 ; oMom + + ) {
for ( int oDad = 0 ; oDad < 3 ; oDad + + ) {
for ( int oChild = 0 ; oChild < 3 ; oChild + + ) {
logLikAssignments [ offset + + ] = momGL [ oMom ] + dadGL [ oDad ] + childGL [ oChild ] ;
}
}
}
double [ ] mvLiks = new double [ 12 ] ;
double [ ] nonMVLiks = new double [ 15 ] ;
for ( int i = 0 ; i < 12 ; i + + ) {
mvLiks [ i ] = logLikAssignments [ mvOffsets [ i ] ] ;
}
for ( int i = 0 ; i < 15 ; i + + ) {
nonMVLiks [ i ] = logLikAssignments [ nonMVOffsets [ i ] ] ;
}
return MathUtils . log10sumLog10 ( mvLiks ) - MathUtils . log10sumLog10 ( nonMVLiks ) ;
}
2011-03-15 00:43:37 +08:00
}