2011-03-15 00:43:37 +08:00
package org.broadinstitute.sting.utils ;
2011-09-29 21:57:15 +08:00
import org.broadinstitute.sting.gatk.samples.Sample ;
2011-03-15 00:43:37 +08:00
import org.broadinstitute.sting.utils.exceptions.UserException ;
2011-07-18 08:29:58 +08:00
import org.broadinstitute.sting.utils.variantcontext.Genotype ;
import org.broadinstitute.sting.utils.variantcontext.VariantContext ;
2011-03-15 00:43:37 +08:00
2011-07-18 08:29:58 +08:00
import java.util.Collection ;
import java.util.List ;
2011-03-15 00:43:37 +08:00
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
/ * *
* User : carneiro
* Date : 3 / 9 / 11
* Time : 12 : 38 PM
* /
public class MendelianViolation {
String sampleMom ;
String sampleDad ;
String sampleChild ;
List allelesMom ;
List allelesDad ;
List allelesChild ;
double minGenotypeQuality ;
private static Pattern FAMILY_PATTERN = Pattern . compile ( "(.*)\\+(.*)=(.*)" ) ;
2011-03-16 02:36:55 +08:00
public String getSampleMom ( ) {
return sampleMom ;
}
public String getSampleDad ( ) {
return sampleDad ;
}
public String getSampleChild ( ) {
return sampleChild ;
}
public double getMinGenotypeQuality ( ) {
return minGenotypeQuality ;
}
2011-03-15 00:43:37 +08:00
/ * *
*
* @param sampleMomP - sample name of mom
* @param sampleDadP - sample name of dad
* @param sampleChildP - sample name of child
* /
public MendelianViolation ( String sampleMomP , String sampleDadP , String sampleChildP ) {
sampleMom = sampleMomP ;
sampleDad = sampleDadP ;
sampleChild = sampleChildP ;
}
/ * *
*
* @param family - the sample names string "mom+dad=child"
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* /
public MendelianViolation ( String family , double minGenotypeQualityP ) {
minGenotypeQuality = minGenotypeQualityP ;
Matcher m = FAMILY_PATTERN . matcher ( family ) ;
if ( m . matches ( ) ) {
sampleMom = m . group ( 1 ) ;
sampleDad = m . group ( 2 ) ;
sampleChild = m . group ( 3 ) ;
}
else
throw new IllegalArgumentException ( "Malformatted family structure string: " + family + " required format is mom+dad=child" ) ;
}
/ * *
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself .
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine .
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
* /
public MendelianViolation ( Sample sample , double minGenotypeQualityP ) {
2011-09-29 22:34:51 +08:00
sampleMom = sample . getMother ( ) . getID ( ) ;
sampleDad = sample . getFather ( ) . getID ( ) ;
sampleChild = sample . getID ( ) ;
2011-03-15 00:43:37 +08:00
minGenotypeQuality = minGenotypeQualityP ;
}
/ * *
2011-03-16 02:36:55 +08:00
* This method prepares the object to evaluate for violation . Typically you won ' t call it directly , a call to
* isViolation ( vc ) will take care of this . But if you want to know whether your site was a valid comparison site
* before evaluating it for mendelian violation , you can call setAlleles and then isViolation ( ) .
2011-03-15 00:43:37 +08:00
* @param vc - the variant context to extract the genotypes and alleles for mom , dad and child .
* @return false if couldn ' t find the genotypes or context has empty alleles . True otherwise .
* /
2011-03-16 02:36:55 +08:00
public boolean setAlleles ( VariantContext vc )
2011-03-15 00:43:37 +08:00
{
Genotype gMom = vc . getGenotypes ( sampleMom ) . get ( sampleMom ) ;
Genotype gDad = vc . getGenotypes ( sampleDad ) . get ( sampleDad ) ;
Genotype gChild = vc . getGenotypes ( sampleChild ) . get ( sampleChild ) ;
if ( gMom = = null | | gDad = = null | | gChild = = null )
throw new IllegalArgumentException ( String . format ( "Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s" , vc . getChr ( ) , vc . getStart ( ) , sampleMom , sampleDad , sampleChild ) ) ;
if ( gMom . isNoCall ( ) | | gDad . isNoCall ( ) | | gChild . isNoCall ( ) | |
gMom . getPhredScaledQual ( ) < minGenotypeQuality | |
gDad . getPhredScaledQual ( ) < minGenotypeQuality | |
gChild . getPhredScaledQual ( ) < minGenotypeQuality ) {
return false ;
}
allelesMom = gMom . getAlleles ( ) ;
allelesDad = gDad . getAlleles ( ) ;
allelesChild = gChild . getAlleles ( ) ;
return ! allelesMom . isEmpty ( ) & & ! allelesDad . isEmpty ( ) & & ! allelesChild . isEmpty ( ) ;
}
/ * *
*
* @param vc the variant context to extract the genotypes and alleles for mom , dad and child .
* @return False if we can ' t determine ( lack of information ) , or it ' s not a violation . True if it is a violation .
*
* /
2011-09-30 00:20:07 +08:00
public boolean isViolation ( VariantContext vc )
2011-03-15 00:43:37 +08:00
{
return setAlleles ( vc ) & & isViolation ( ) ;
}
/ * *
* @return whether or not there is a mendelian violation at the site .
* /
2011-03-16 02:36:55 +08:00
public boolean isViolation ( ) {
2011-03-15 00:43:37 +08:00
if ( allelesMom . contains ( allelesChild . get ( 0 ) ) & & allelesDad . contains ( allelesChild . get ( 1 ) ) | |
allelesMom . contains ( allelesChild . get ( 1 ) ) & & allelesDad . contains ( allelesChild . get ( 0 ) ) )
return false ;
return true ;
}
}