Adapted MendelianViolation to the new ped family representation. Adapted all classes using MendelianViolation too.
MendelianViolationEvaluator was added a number of useful metrics on allele transmission and MVs
This commit is contained in:
parent
e877db8f42
commit
795c99d693
|
|
@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -30,23 +26,26 @@ import java.util.Map;
|
|||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
private MendelianViolation mendelianViolation = null;
|
||||
private String motherId;
|
||||
private String fatherId;
|
||||
private String childId;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( mendelianViolation == null ) {
|
||||
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
|
||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line.");
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
|
||||
}
|
||||
}
|
||||
|
||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods();
|
||||
boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
|
||||
vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
|
||||
vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
|
||||
if ( hasAppropriateGenotypes )
|
||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
|
||||
|
||||
return toRet;
|
||||
}
|
||||
|
|
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
|||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||
|
||||
private boolean checkAndSetSamples(SampleDB db){
|
||||
Set<String> families = db.getFamilyIDs();
|
||||
if(families.size() != 1)
|
||||
return false;
|
||||
|
||||
Set<Sample> family = db.getFamily(families.iterator().next());
|
||||
if(family.size() != 3)
|
||||
return false;
|
||||
|
||||
Iterator<Sample> sampleIter = family.iterator();
|
||||
Sample sample;
|
||||
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
|
||||
if(sample.getParents().size()==2){
|
||||
motherId = sample.getMaternalID();
|
||||
fatherId = sample.getPaternalID();
|
||||
childId = sample.getID();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Mendelian violation detection and counting
|
||||
|
|
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
||||
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||
|
||||
@DataPoint(description = "Number of mendelian variants found")
|
||||
@DataPoint(description = "Number of variants found with at least one family having genotypes")
|
||||
long nVariants;
|
||||
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
|
||||
long nSkipped;
|
||||
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
|
||||
long nFamCalled;
|
||||
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
|
||||
long nVarFamCalled;
|
||||
@DataPoint(description="Number of variants x families discarded as low quality")
|
||||
long nLowQual;
|
||||
@DataPoint(description="Number of variants x families discarded as no call")
|
||||
long nNoCall;
|
||||
@DataPoint(description="Number of loci with mendelian violations")
|
||||
long nLociViolations;
|
||||
@DataPoint(description = "Number of mendelian violations found")
|
||||
long nViolations;
|
||||
|
||||
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||
|
||||
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||
long KidHomRef_ParentHomVar;
|
||||
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
||||
long KidHet_ParentsHomRef;
|
||||
|
|
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
long KidHet_ParentsHomVar;
|
||||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||
long KidHomVar_ParentHomRef;
|
||||
*/
|
||||
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
|
||||
long mvRefRef_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
|
||||
long mvRefRef_Het;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
|
||||
long mvRefHet_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
|
||||
long mvRefVar_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
|
||||
long mvRefVar_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
|
||||
long mvVarHet_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
|
||||
long mvVarVar_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
|
||||
long mvVarVar_Het;
|
||||
|
||||
|
||||
/*@DataPoint(description ="Number of inherited var alleles from het parents")
|
||||
long nInheritedVar;
|
||||
@DataPoint(description ="Number of inherited ref alleles from het parents")
|
||||
long nInheritedRef;*/
|
||||
|
||||
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
|
||||
long HomRefHomRef_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/Het trios")
|
||||
long HetHet_Het;
|
||||
@DataPoint(description="Number of Het/Het/HomRef trios")
|
||||
long HetHet_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/HomVar trios")
|
||||
long HetHet_HomVar;
|
||||
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
|
||||
long HomVarHomVar_HomVar;
|
||||
@DataPoint(description="Number of HomRef/HomVar/Het trios")
|
||||
long HomRefHomVAR_Het;
|
||||
@DataPoint(description="Number of ref alleles inherited from het/het parents")
|
||||
long HetHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from het/het parents")
|
||||
long HetHet_inheritedVar;
|
||||
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
|
||||
long HomRefHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
|
||||
long HomRefHet_inheritedVar;
|
||||
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
|
||||
long HomVarHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
|
||||
long HomVarHet_inheritedVar;
|
||||
|
||||
MendelianViolation mv;
|
||||
PrintStream mvFile;
|
||||
Map<String,Set<Sample>> families;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||
//Changed by Laurent Francioli - 2011-06-07
|
||||
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
|
||||
families = walker.getSampleDB().getFamilies();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
|
|
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||
if (mv.setAlleles(vc)) {
|
||||
|
||||
if(mv.countViolations(families,vc)>0){
|
||||
nLociViolations++;
|
||||
nViolations += mv.getViolationsCount();
|
||||
mvRefRef_Var += mv.getParentsRefRefChildVar();
|
||||
mvRefRef_Het += mv.getParentsRefRefChildHet();
|
||||
mvRefHet_Var += mv.getParentsRefHetChildVar();
|
||||
mvRefVar_Var += mv.getParentsRefVarChildVar();
|
||||
mvRefVar_Ref += mv.getParentsRefVarChildRef();
|
||||
mvVarHet_Ref += mv.getParentsVarHetChildRef();
|
||||
mvVarVar_Ref += mv.getParentsVarVarChildRef();
|
||||
mvVarVar_Het += mv.getParentsVarVarChildHet();
|
||||
|
||||
}
|
||||
HomRefHomRef_HomRef += mv.getRefRefRef();
|
||||
HetHet_Het += mv.getHetHetHet();
|
||||
HetHet_HomRef += mv.getHetHetHomRef();
|
||||
HetHet_HomVar += mv.getHetHetHomVar();
|
||||
HomVarHomVar_HomVar += mv.getVarVarVar();
|
||||
HomRefHomVAR_Het += mv.getRefVarHet();
|
||||
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
|
||||
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
|
||||
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
|
||||
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
|
||||
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
|
||||
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
|
||||
|
||||
if(mv.getFamilyCalledCount()>0){
|
||||
nVariants++;
|
||||
|
||||
Genotype momG = vc.getGenotype(mv.getSampleMom());
|
||||
Genotype dadG = vc.getGenotype(mv.getSampleDad());
|
||||
Genotype childG = vc.getGenotype(mv.getSampleChild());
|
||||
|
||||
if (mv.isViolation()) {
|
||||
nViolations++;
|
||||
|
||||
String label;
|
||||
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
|
||||
label = "KidHomRef_ParentHomVar";
|
||||
KidHomRef_ParentHomVar++;
|
||||
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
|
||||
label = "KidHet_ParentsHomRef";
|
||||
KidHet_ParentsHomRef++;
|
||||
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
|
||||
label = "KidHet_ParentsHomVar";
|
||||
KidHet_ParentsHomVar++;
|
||||
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
|
||||
label = "KidHomVar_ParentHomRef";
|
||||
KidHomVar_ParentHomRef++;
|
||||
} else {
|
||||
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
|
||||
}
|
||||
|
||||
return "MendelViolation=" + label;
|
||||
}
|
||||
nFamCalled += mv.getFamilyCalledCount();
|
||||
nLowQual += mv.getFamilyLowQualsCount();
|
||||
nNoCall += mv.getFamilyNoCallCount();
|
||||
nVarFamCalled += mv.getVarFamilyCalledCount();
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
private double getQThreshold() {
|
||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
//return 0.0;
|
||||
}
|
||||
|
||||
TrioStructure trio;
|
||||
double mendelianViolationQualThreshold;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public static class TrioStructure {
|
||||
public String mom, dad, child;
|
||||
}
|
||||
|
||||
public static TrioStructure parseTrioDescription(String family) {
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
TrioStructure trio = new TrioStructure();
|
||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||
trio.mom = m.group(1);
|
||||
trio.dad = m.group(2);
|
||||
trio.child = m.group(3);
|
||||
return trio;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
||||
}
|
||||
|
||||
private boolean includeGenotype(Genotype g) {
|
||||
return g.getLog10PError() > getQThreshold() && g.isCalled();
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
|
||||
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
|
||||
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
|
||||
}
|
||||
|
||||
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
|
||||
//VariantContext momVC = vc.subContextFromGenotypes(momG);
|
||||
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
|
||||
int i = 0;
|
||||
Genotype childG = new Genotype("kidG", childA);
|
||||
for (Allele momAllele : momA) {
|
||||
for (Allele dadAllele : dadA) {
|
||||
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
||||
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
|
||||
if (childG.sameGenotype(possibleChild)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else{
|
||||
nSkipped++;
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return true;
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -282,6 +281,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
||||
private double fractionRandom = 0;
|
||||
|
||||
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
|
||||
private double fractionGenotypes = 0;
|
||||
|
||||
/**
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* When specified one or more times, a particular type of variant is selected.
|
||||
|
|
@ -325,7 +327,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
private boolean DISCORDANCE_ONLY = false;
|
||||
private boolean CONCORDANCE_ONLY = false;
|
||||
|
||||
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
|
||||
private MendelianViolation mv;
|
||||
|
||||
|
||||
/* variables used by the SELECT RANDOM modules */
|
||||
|
|
@ -344,6 +346,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private PrintStream outMVFileStream = null;
|
||||
|
||||
//Random number generator for the genotypes to remove
|
||||
private Random randomGenotypes = new Random();
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
|
|
@ -380,8 +384,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
for ( String sample : samples )
|
||||
logger.info("Including sample '" + sample + "'");
|
||||
|
||||
|
||||
|
||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||
if (TYPES_TO_INCLUDE.isEmpty()) {
|
||||
|
||||
|
|
@ -421,29 +423,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
|
||||
|
||||
if (MENDELIAN_VIOLATIONS) {
|
||||
if ( FAMILY_STRUCTURE_FILE != null) {
|
||||
try {
|
||||
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
|
||||
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
|
||||
mvSet.add(mv);
|
||||
}
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
|
||||
}
|
||||
if (outMVFile != null)
|
||||
try {
|
||||
outMVFileStream = new PrintStream(outMVFile);
|
||||
}
|
||||
catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||
}
|
||||
else
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
}
|
||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
MENDELIAN_VIOLATIONS = true;
|
||||
mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
|
||||
}
|
||||
|
||||
SELECT_RANDOM_NUMBER = numRandom > 0;
|
||||
|
|
@ -479,26 +459,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if (MENDELIAN_VIOLATIONS) {
|
||||
boolean foundMV = false;
|
||||
for (MendelianViolation mv : mvSet) {
|
||||
if (mv.isViolation(vc)) {
|
||||
foundMV = true;
|
||||
//System.out.println(vc.toString());
|
||||
if (outMVFile != null)
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||
break;
|
||||
|
||||
if (outMVFile != null){
|
||||
for( String familyId : mv.getViolationFamilies()){
|
||||
for(Sample sample : this.getSampleDB().getFamily(familyId)){
|
||||
if(sample.getParents().size() > 0){
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
|
||||
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() );
|
||||
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundMV)
|
||||
break;
|
||||
}
|
||||
|
||||
if (DISCORDANCE_ONLY) {
|
||||
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
||||
if (!isDiscordant(vc, compVCs))
|
||||
|
|
@ -657,9 +637,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
|
||||
GenotypesContext newGC = sub.getGenotypes();
|
||||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes()));
|
||||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
}
|
||||
}
|
||||
newGC = GenotypesContext.create(genotypes);
|
||||
}
|
||||
|
||||
builder.genotypes(newGC);
|
||||
|
||||
int depth = 0;
|
||||
for (String sample : sub.getSampleNames()) {
|
||||
|
|
|
|||
|
|
@ -1,147 +1,399 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* User: carneiro / lfran
|
||||
* Date: 3/9/11
|
||||
* Time: 12:38 PM
|
||||
*
|
||||
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
|
||||
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
|
||||
* walking over the variants
|
||||
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
|
||||
*
|
||||
*/
|
||||
public class MendelianViolation {
|
||||
String sampleMom;
|
||||
String sampleDad;
|
||||
String sampleChild;
|
||||
//List of families with violations
|
||||
private List<String> violationFamilies;
|
||||
|
||||
List allelesMom;
|
||||
List allelesDad;
|
||||
List allelesChild;
|
||||
//Call information
|
||||
private int nocall = 0;
|
||||
private int familyCalled = 0;
|
||||
private int varFamilyCalled = 0;
|
||||
private int lowQual = 0;
|
||||
|
||||
double minGenotypeQuality;
|
||||
private boolean allCalledOnly = true;
|
||||
|
||||
//Stores occurrences of inheritance
|
||||
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||
|
||||
private int violations_total=0;
|
||||
|
||||
private double minGenotypeQuality;
|
||||
|
||||
private boolean abortOnSampleNotFound;
|
||||
|
||||
//Number of families with genotype information for all members
|
||||
public int getFamilyCalledCount(){
|
||||
return familyCalled;
|
||||
}
|
||||
|
||||
//Number of families with genotype information for all members
|
||||
public int getVarFamilyCalledCount(){
|
||||
return varFamilyCalled;
|
||||
}
|
||||
|
||||
//Number of families missing genotypes for one or more of their members
|
||||
public int getFamilyNoCallCount(){
|
||||
return nocall;
|
||||
}
|
||||
|
||||
//Number of families with genotypes below the set quality threshold
|
||||
public int getFamilyLowQualsCount(){
|
||||
return lowQual;
|
||||
}
|
||||
|
||||
public int getViolationsCount(){
|
||||
return violations_total;
|
||||
}
|
||||
|
||||
//Count of alt alleles inherited from het parents (no violation)
|
||||
public int getParentHetInheritedVar(){
|
||||
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from het parents (no violation)
|
||||
public int getParentHetInheritedRef(){
|
||||
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
|
||||
}
|
||||
|
||||
//Count of HomRef/HomRef/HomRef trios
|
||||
public int getRefRefRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of HomVar/HomVar/HomVar trios
|
||||
public int getVarVarVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of HomRef/HomVar/Het trios
|
||||
public int getRefVarHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/Het trios
|
||||
public int getHetHetHet(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomRef trios
|
||||
public int getHetHetHomRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of Het/Het/HomVar trios
|
||||
public int getHetHetHomVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
//return parentsHetHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||
public int getParentsHetHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||
//return parentsHetHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
//return parentsHomRefHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||
public int getParentsRefHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
//return parentsHomRefHet_childVar;
|
||||
}
|
||||
|
||||
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
//return parentsHomVarHet_childRef;
|
||||
}
|
||||
|
||||
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||
public int getParentsVarHetInheritedVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||
//return parentsHomVarHet_childVar;
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||
public int getParentsRefRefChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||
public int getParentsRefRefChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||
public int getParentsRefHetChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||
public int getParentsRefVarChildVar(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||
public int getParentsRefVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||
public int getParentsVarHetChildRef(){
|
||||
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||
public int getParentsVarVarChildRef(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||
public int getParentsVarVarChildHet(){
|
||||
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||
}
|
||||
|
||||
|
||||
//Count of violations of the type HOM_VAR/? -> HOM_REF
|
||||
public int getParentVarChildRef(){
|
||||
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
|
||||
}
|
||||
|
||||
//Count of violations of the type HOM_REF/? -> HOM_VAR
|
||||
public int getParentRefChildVar(){
|
||||
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
|
||||
}
|
||||
|
||||
//Returns a String containing all trios where a Mendelian violation was observed.
|
||||
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
|
||||
public String getViolationFamiliesString(){
|
||||
if(violationFamilies.isEmpty())
|
||||
return "";
|
||||
|
||||
Iterator<String> it = violationFamilies.iterator();
|
||||
String violationFams = it.next();
|
||||
while(it.hasNext()){
|
||||
violationFams += ","+it.next();
|
||||
}
|
||||
return violationFams;
|
||||
}
|
||||
|
||||
public List<String> getViolationFamilies(){
|
||||
return violationFamilies;
|
||||
}
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public String getSampleMom() {
|
||||
return sampleMom;
|
||||
}
|
||||
public String getSampleDad() {
|
||||
return sampleDad;
|
||||
}
|
||||
public String getSampleChild() {
|
||||
return sampleChild;
|
||||
}
|
||||
public double getMinGenotypeQuality() {
|
||||
return minGenotypeQuality;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sampleMomP - sample name of mom
|
||||
* @param sampleDadP - sample name of dad
|
||||
* @param sampleChildP - sample name of child
|
||||
*/
|
||||
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
|
||||
sampleMom = sampleMomP;
|
||||
sampleDad = sampleDadP;
|
||||
sampleChild = sampleChildP;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param family - the sample names string "mom+dad=child"
|
||||
/**
|
||||
* Constructor
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
*/
|
||||
public MendelianViolation(String family, double minGenotypeQualityP) {
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
sampleMom = m.group(1);
|
||||
sampleDad = m.group(2);
|
||||
sampleChild = m.group(3);
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
|
||||
/**
|
||||
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
|
||||
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
*/
|
||||
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
|
||||
sampleMom = sample.getMother().getID();
|
||||
sampleDad = sample.getFather().getID();
|
||||
sampleChild = sample.getID();
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
||||
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
||||
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
|
||||
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
||||
*/
|
||||
public boolean setAlleles (VariantContext vc)
|
||||
{
|
||||
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
||||
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
||||
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
|
||||
|
||||
if (gMom == null || gDad == null || gChild == null)
|
||||
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
|
||||
|
||||
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
|
||||
gMom.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
allelesMom = gMom.getAlleles();
|
||||
allelesDad = gDad.getAlleles();
|
||||
allelesChild = gChild.getAlleles();
|
||||
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public MendelianViolation(double minGenotypeQualityP) {
|
||||
this(minGenotypeQualityP,true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||
*/
|
||||
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||
violationFamilies = new ArrayList<String>();
|
||||
createInheritanceMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
|
||||
*/
|
||||
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
|
||||
minGenotypeQuality = minGenotypeQualityP;
|
||||
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||
violationFamilies = new ArrayList<String>();
|
||||
createInheritanceMap();
|
||||
allCalledOnly = completeTriosOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param families the families to be checked for Mendelian violations
|
||||
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
|
||||
*
|
||||
*/
|
||||
public boolean isViolation(VariantContext vc)
|
||||
{
|
||||
return setAlleles(vc) && isViolation();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return whether or not there is a mendelian violation at the site.
|
||||
*/
|
||||
public boolean isViolation() {
|
||||
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
||||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
||||
return false;
|
||||
return true;
|
||||
public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
|
||||
|
||||
//Reset counts
|
||||
nocall = 0;
|
||||
lowQual = 0;
|
||||
familyCalled = 0;
|
||||
varFamilyCalled = 0;
|
||||
violations_total=0;
|
||||
violationFamilies.clear();
|
||||
clearInheritanceMap();
|
||||
|
||||
for(Set<Sample> family : families.values()){
|
||||
Iterator<Sample> sampleIterator = family.iterator();
|
||||
Sample sample;
|
||||
while(sampleIterator.hasNext()){
|
||||
sample = sampleIterator.next();
|
||||
if(sample.getParents().size() > 0)
|
||||
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
|
||||
}
|
||||
}
|
||||
return violations_total;
|
||||
}
|
||||
|
||||
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
|
||||
|
||||
//Reset counts
|
||||
nocall = 0;
|
||||
lowQual = 0;
|
||||
familyCalled = 0;
|
||||
varFamilyCalled = 0;
|
||||
violations_total=0;
|
||||
violationFamilies.clear();
|
||||
clearInheritanceMap();
|
||||
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
|
||||
return violations_total>0;
|
||||
}
|
||||
|
||||
|
||||
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
|
||||
|
||||
int count;
|
||||
Genotype gMom = vc.getGenotype(motherId);
|
||||
Genotype gDad = vc.getGenotype(fatherId);
|
||||
Genotype gChild = vc.getGenotype(childId);
|
||||
|
||||
if (gMom == null || gDad == null || gChild == null){
|
||||
if(abortOnSampleNotFound)
|
||||
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
|
||||
else
|
||||
return;
|
||||
}
|
||||
//Count No calls
|
||||
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
|
||||
nocall++;
|
||||
}
|
||||
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
|
||||
nocall++;
|
||||
}
|
||||
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
|
||||
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
||||
gChild.getPhredScaledQual() < minGenotypeQuality )) {
|
||||
lowQual++;
|
||||
}
|
||||
else{
|
||||
//Count all families per loci called
|
||||
familyCalled++;
|
||||
//If the family is all homref, not too interesting
|
||||
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
|
||||
{
|
||||
varFamilyCalled++;
|
||||
if(isViolation(gMom, gDad, gChild)){
|
||||
violationFamilies.add(familyId);
|
||||
violations_total++;
|
||||
}
|
||||
}
|
||||
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
|
||||
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
|
||||
//1 parent is no "call
|
||||
if(!gMom.isCalled()){
|
||||
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
|
||||
}
|
||||
else if(!gDad.isCalled()){
|
||||
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
|
||||
}
|
||||
//Both parents have genotype information
|
||||
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
|
||||
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
|
||||
}
|
||||
|
||||
private void createInheritanceMap(){
|
||||
|
||||
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void clearInheritanceMap(){
|
||||
for(Genotype.Type mType : Genotype.Type.values()){
|
||||
for(Genotype.Type dType : Genotype.Type.values()){
|
||||
for(Genotype.Type cType : Genotype.Type.values()){
|
||||
inheritance.get(mType).get(dType).put(cType, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
|
|
@ -152,9 +404,9 @@ public class MendelianViolation {
|
|||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue