Adapted MendelianViolation to the new ped family representation. Adapted all classes using MendelianViolation too.
MendelianViolationEvaluator was added a number of useful metrics on allele transmission and MVs
This commit is contained in:
parent
e877db8f42
commit
795c99d693
|
|
@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -30,23 +26,26 @@ import java.util.Map;
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
|
private String motherId;
|
||||||
|
private String fatherId;
|
||||||
|
private String childId;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) {
|
if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) {
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line.");
|
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||||
boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() &&
|
boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() &&
|
||||||
vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() &&
|
vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() &&
|
||||||
vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods();
|
vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods();
|
||||||
if ( hasAppropriateGenotypes )
|
if ( hasAppropriateGenotypes )
|
||||||
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc));
|
toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId));
|
||||||
|
|
||||||
return toRet;
|
return toRet;
|
||||||
}
|
}
|
||||||
|
|
@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
public List<String> getKeyNames() { return Arrays.asList("MVLR"); }
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||||
|
|
||||||
|
private boolean checkAndSetSamples(SampleDB db){
|
||||||
|
Set<String> families = db.getFamilyIDs();
|
||||||
|
if(families.size() != 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Set<Sample> family = db.getFamily(families.iterator().next());
|
||||||
|
if(family.size() != 3)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Iterator<Sample> sampleIter = family.iterator();
|
||||||
|
Sample sample;
|
||||||
|
for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){
|
||||||
|
if(sample.getParents().size()==2){
|
||||||
|
motherId = sample.getMaternalID();
|
||||||
|
fatherId = sample.getPaternalID();
|
||||||
|
childId = sample.getID();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import java.io.PrintStream;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mendelian violation detection and counting
|
* Mendelian violation detection and counting
|
||||||
|
|
@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
||||||
public class MendelianViolationEvaluator extends VariantEvaluator {
|
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
@DataPoint(description = "Number of mendelian variants found")
|
@DataPoint(description = "Number of variants found with at least one family having genotypes")
|
||||||
long nVariants;
|
long nVariants;
|
||||||
|
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall")
|
||||||
|
long nSkipped;
|
||||||
|
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)")
|
||||||
|
long nFamCalled;
|
||||||
|
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.")
|
||||||
|
long nVarFamCalled;
|
||||||
|
@DataPoint(description="Number of variants x families discarded as low quality")
|
||||||
|
long nLowQual;
|
||||||
|
@DataPoint(description="Number of variants x families discarded as no call")
|
||||||
|
long nNoCall;
|
||||||
|
@DataPoint(description="Number of loci with mendelian violations")
|
||||||
|
long nLociViolations;
|
||||||
@DataPoint(description = "Number of mendelian violations found")
|
@DataPoint(description = "Number of mendelian violations found")
|
||||||
long nViolations;
|
long nViolations;
|
||||||
|
|
||||||
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
|
||||||
|
/*@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||||
long KidHomRef_ParentHomVar;
|
long KidHomRef_ParentHomVar;
|
||||||
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
||||||
long KidHet_ParentsHomRef;
|
long KidHet_ParentsHomRef;
|
||||||
|
|
@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
long KidHet_ParentsHomVar;
|
long KidHet_ParentsHomVar;
|
||||||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||||
long KidHomVar_ParentHomRef;
|
long KidHomVar_ParentHomRef;
|
||||||
|
*/
|
||||||
|
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR")
|
||||||
|
long mvRefRef_Var;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET")
|
||||||
|
long mvRefRef_Het;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR")
|
||||||
|
long mvRefHet_Var;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR")
|
||||||
|
long mvRefVar_Var;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF")
|
||||||
|
long mvRefVar_Ref;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF")
|
||||||
|
long mvVarHet_Ref;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF")
|
||||||
|
long mvVarVar_Ref;
|
||||||
|
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET")
|
||||||
|
long mvVarVar_Het;
|
||||||
|
|
||||||
|
|
||||||
|
/*@DataPoint(description ="Number of inherited var alleles from het parents")
|
||||||
|
long nInheritedVar;
|
||||||
|
@DataPoint(description ="Number of inherited ref alleles from het parents")
|
||||||
|
long nInheritedRef;*/
|
||||||
|
|
||||||
|
@DataPoint(description="Number of HomRef/HomRef/HomRef trios")
|
||||||
|
long HomRefHomRef_HomRef;
|
||||||
|
@DataPoint(description="Number of Het/Het/Het trios")
|
||||||
|
long HetHet_Het;
|
||||||
|
@DataPoint(description="Number of Het/Het/HomRef trios")
|
||||||
|
long HetHet_HomRef;
|
||||||
|
@DataPoint(description="Number of Het/Het/HomVar trios")
|
||||||
|
long HetHet_HomVar;
|
||||||
|
@DataPoint(description="Number of HomVar/HomVar/HomVar trios")
|
||||||
|
long HomVarHomVar_HomVar;
|
||||||
|
@DataPoint(description="Number of HomRef/HomVar/Het trios")
|
||||||
|
long HomRefHomVAR_Het;
|
||||||
|
@DataPoint(description="Number of ref alleles inherited from het/het parents")
|
||||||
|
long HetHet_inheritedRef;
|
||||||
|
@DataPoint(description="Number of var alleles inherited from het/het parents")
|
||||||
|
long HetHet_inheritedVar;
|
||||||
|
@DataPoint(description="Number of ref alleles inherited from homRef/het parents")
|
||||||
|
long HomRefHet_inheritedRef;
|
||||||
|
@DataPoint(description="Number of var alleles inherited from homRef/het parents")
|
||||||
|
long HomRefHet_inheritedVar;
|
||||||
|
@DataPoint(description="Number of ref alleles inherited from homVar/het parents")
|
||||||
|
long HomVarHet_inheritedRef;
|
||||||
|
@DataPoint(description="Number of var alleles inherited from homVar/het parents")
|
||||||
|
long HomVarHet_inheritedVar;
|
||||||
|
|
||||||
MendelianViolation mv;
|
MendelianViolation mv;
|
||||||
|
PrintStream mvFile;
|
||||||
|
Map<String,Set<Sample>> families;
|
||||||
|
|
||||||
public void initialize(VariantEvalWalker walker) {
|
public void initialize(VariantEvalWalker walker) {
|
||||||
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
//Changed by Laurent Francioli - 2011-06-07
|
||||||
|
//mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||||
|
mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false);
|
||||||
|
families = walker.getSampleDB().getFamilies();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean enabled() {
|
public boolean enabled() {
|
||||||
|
|
@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||||
if (mv.setAlleles(vc)) {
|
|
||||||
|
if(mv.countViolations(families,vc)>0){
|
||||||
|
nLociViolations++;
|
||||||
|
nViolations += mv.getViolationsCount();
|
||||||
|
mvRefRef_Var += mv.getParentsRefRefChildVar();
|
||||||
|
mvRefRef_Het += mv.getParentsRefRefChildHet();
|
||||||
|
mvRefHet_Var += mv.getParentsRefHetChildVar();
|
||||||
|
mvRefVar_Var += mv.getParentsRefVarChildVar();
|
||||||
|
mvRefVar_Ref += mv.getParentsRefVarChildRef();
|
||||||
|
mvVarHet_Ref += mv.getParentsVarHetChildRef();
|
||||||
|
mvVarVar_Ref += mv.getParentsVarVarChildRef();
|
||||||
|
mvVarVar_Het += mv.getParentsVarVarChildHet();
|
||||||
|
|
||||||
|
}
|
||||||
|
HomRefHomRef_HomRef += mv.getRefRefRef();
|
||||||
|
HetHet_Het += mv.getHetHetHet();
|
||||||
|
HetHet_HomRef += mv.getHetHetHomRef();
|
||||||
|
HetHet_HomVar += mv.getHetHetHomVar();
|
||||||
|
HomVarHomVar_HomVar += mv.getVarVarVar();
|
||||||
|
HomRefHomVAR_Het += mv.getRefVarHet();
|
||||||
|
HetHet_inheritedRef += mv.getParentsHetHetInheritedRef();
|
||||||
|
HetHet_inheritedVar += mv.getParentsHetHetInheritedVar();
|
||||||
|
HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef();
|
||||||
|
HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar();
|
||||||
|
HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef();
|
||||||
|
HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar();
|
||||||
|
|
||||||
|
if(mv.getFamilyCalledCount()>0){
|
||||||
nVariants++;
|
nVariants++;
|
||||||
|
nFamCalled += mv.getFamilyCalledCount();
|
||||||
Genotype momG = vc.getGenotype(mv.getSampleMom());
|
nLowQual += mv.getFamilyLowQualsCount();
|
||||||
Genotype dadG = vc.getGenotype(mv.getSampleDad());
|
nNoCall += mv.getFamilyNoCallCount();
|
||||||
Genotype childG = vc.getGenotype(mv.getSampleChild());
|
nVarFamCalled += mv.getVarFamilyCalledCount();
|
||||||
|
|
||||||
if (mv.isViolation()) {
|
|
||||||
nViolations++;
|
|
||||||
|
|
||||||
String label;
|
|
||||||
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
|
|
||||||
label = "KidHomRef_ParentHomVar";
|
|
||||||
KidHomRef_ParentHomVar++;
|
|
||||||
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
|
|
||||||
label = "KidHet_ParentsHomRef";
|
|
||||||
KidHet_ParentsHomRef++;
|
|
||||||
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
|
|
||||||
label = "KidHet_ParentsHomVar";
|
|
||||||
KidHet_ParentsHomVar++;
|
|
||||||
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
|
|
||||||
label = "KidHomVar_ParentHomRef";
|
|
||||||
KidHomVar_ParentHomRef++;
|
|
||||||
} else {
|
|
||||||
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
|
|
||||||
}
|
|
||||||
|
|
||||||
return "MendelViolation=" + label;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
else{
|
||||||
|
nSkipped++;
|
||||||
return null; // we don't capture any intersting sites
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
private double getQThreshold() {
|
|
||||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
|
||||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
|
||||||
//return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrioStructure trio;
|
|
||||||
double mendelianViolationQualThreshold;
|
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
|
||||||
|
|
||||||
public static class TrioStructure {
|
|
||||||
public String mom, dad, child;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static TrioStructure parseTrioDescription(String family) {
|
|
||||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
|
||||||
if (m.matches()) {
|
|
||||||
TrioStructure trio = new TrioStructure();
|
|
||||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
|
||||||
trio.mom = m.group(1);
|
|
||||||
trio.dad = m.group(2);
|
|
||||||
trio.child = m.group(3);
|
|
||||||
return trio;
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initialize(VariantEvalWalker walker) {
|
|
||||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
|
||||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean includeGenotype(Genotype g) {
|
|
||||||
return g.getLog10PError() > getQThreshold() && g.isCalled();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
|
|
||||||
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
|
|
||||||
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
|
|
||||||
//VariantContext momVC = vc.subContextFromGenotypes(momG);
|
|
||||||
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
|
|
||||||
int i = 0;
|
|
||||||
Genotype childG = new Genotype("kidG", childA);
|
|
||||||
for (Allele momAllele : momA) {
|
|
||||||
for (Allele dadAllele : dadA) {
|
|
||||||
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
|
||||||
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
|
|
||||||
if (childG.sameGenotype(possibleChild)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return null; // we don't capture any interesting sites
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -282,6 +281,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
@Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false)
|
||||||
private double fractionRandom = 0;
|
private double fractionRandom = 0;
|
||||||
|
|
||||||
|
@Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false)
|
||||||
|
private double fractionGenotypes = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||||
* When specified one or more times, a particular type of variant is selected.
|
* When specified one or more times, a particular type of variant is selected.
|
||||||
|
|
@ -325,7 +327,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
private boolean DISCORDANCE_ONLY = false;
|
private boolean DISCORDANCE_ONLY = false;
|
||||||
private boolean CONCORDANCE_ONLY = false;
|
private boolean CONCORDANCE_ONLY = false;
|
||||||
|
|
||||||
private Set<MendelianViolation> mvSet = new HashSet<MendelianViolation>();
|
private MendelianViolation mv;
|
||||||
|
|
||||||
|
|
||||||
/* variables used by the SELECT RANDOM modules */
|
/* variables used by the SELECT RANDOM modules */
|
||||||
|
|
@ -344,6 +346,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
private PrintStream outMVFileStream = null;
|
private PrintStream outMVFileStream = null;
|
||||||
|
|
||||||
|
//Random number generator for the genotypes to remove
|
||||||
|
private Random randomGenotypes = new Random();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
|
|
@ -380,8 +384,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
for ( String sample : samples )
|
for ( String sample : samples )
|
||||||
logger.info("Including sample '" + sample + "'");
|
logger.info("Including sample '" + sample + "'");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||||
if (TYPES_TO_INCLUDE.isEmpty()) {
|
if (TYPES_TO_INCLUDE.isEmpty()) {
|
||||||
|
|
||||||
|
|
@ -421,29 +423,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
|
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName());
|
||||||
|
|
||||||
if (MENDELIAN_VIOLATIONS) {
|
if (MENDELIAN_VIOLATIONS) {
|
||||||
if ( FAMILY_STRUCTURE_FILE != null) {
|
mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true);
|
||||||
try {
|
|
||||||
for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) {
|
|
||||||
MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
|
||||||
if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom()))
|
|
||||||
mvSet.add(mv);
|
|
||||||
}
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
|
|
||||||
}
|
|
||||||
if (outMVFile != null)
|
|
||||||
try {
|
|
||||||
outMVFileStream = new PrintStream(outMVFile);
|
|
||||||
}
|
|
||||||
catch (FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
|
||||||
}
|
|
||||||
else
|
|
||||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
|
||||||
}
|
|
||||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
|
||||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
|
||||||
MENDELIAN_VIOLATIONS = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SELECT_RANDOM_NUMBER = numRandom > 0;
|
SELECT_RANDOM_NUMBER = numRandom > 0;
|
||||||
|
|
@ -479,26 +459,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
if (MENDELIAN_VIOLATIONS) {
|
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||||
boolean foundMV = false;
|
break;
|
||||||
for (MendelianViolation mv : mvSet) {
|
|
||||||
if (mv.isViolation(vc)) {
|
if (outMVFile != null){
|
||||||
foundMV = true;
|
for( String familyId : mv.getViolationFamilies()){
|
||||||
//System.out.println(vc.toString());
|
for(Sample sample : this.getSampleDB().getFamily(familyId)){
|
||||||
if (outMVFile != null)
|
if(sample.getParents().size() > 0){
|
||||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||||
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
|
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||||
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||||
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
|
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||||
vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() );
|
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!foundMV)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DISCORDANCE_ONLY) {
|
if (DISCORDANCE_ONLY) {
|
||||||
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
||||||
if (!isDiscordant(vc, compVCs))
|
if (!isDiscordant(vc, compVCs))
|
||||||
|
|
@ -657,9 +637,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||||
|
|
||||||
|
GenotypesContext newGC = sub.getGenotypes();
|
||||||
|
|
||||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||||
builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes()));
|
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||||
|
|
||||||
|
//Remove a fraction of the genotypes if needed
|
||||||
|
if(fractionGenotypes>0){
|
||||||
|
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
for ( Genotype genotype : newGC ) {
|
||||||
|
//Set genotype to no call if it falls in the fraction.
|
||||||
|
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||||
|
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||||
|
alleles.add(Allele.create((byte)'.'));
|
||||||
|
alleles.add(Allele.create((byte)'.'));
|
||||||
|
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
genotypes.add(genotype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newGC = GenotypesContext.create(genotypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.genotypes(newGC);
|
||||||
|
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
for (String sample : sub.getSampleNames()) {
|
for (String sample : sub.getSampleNames()) {
|
||||||
|
|
|
||||||
|
|
@ -1,147 +1,399 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: carneiro
|
* User: carneiro / lfran
|
||||||
* Date: 3/9/11
|
* Date: 3/9/11
|
||||||
* Time: 12:38 PM
|
* Time: 12:38 PM
|
||||||
|
*
|
||||||
|
* Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways:
|
||||||
|
* - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while
|
||||||
|
* walking over the variants
|
||||||
|
* - Or using the static methods to directly get information about mendelian violation in a family at a given locus
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
public class MendelianViolation {
|
public class MendelianViolation {
|
||||||
String sampleMom;
|
//List of families with violations
|
||||||
String sampleDad;
|
private List<String> violationFamilies;
|
||||||
String sampleChild;
|
|
||||||
|
|
||||||
List allelesMom;
|
//Call information
|
||||||
List allelesDad;
|
private int nocall = 0;
|
||||||
List allelesChild;
|
private int familyCalled = 0;
|
||||||
|
private int varFamilyCalled = 0;
|
||||||
|
private int lowQual = 0;
|
||||||
|
|
||||||
double minGenotypeQuality;
|
private boolean allCalledOnly = true;
|
||||||
|
|
||||||
|
//Stores occurrences of inheritance
|
||||||
|
private EnumMap<Genotype.Type, EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>> inheritance;
|
||||||
|
|
||||||
|
private int violations_total=0;
|
||||||
|
|
||||||
|
private double minGenotypeQuality;
|
||||||
|
|
||||||
|
private boolean abortOnSampleNotFound;
|
||||||
|
|
||||||
|
//Number of families with genotype information for all members
|
||||||
|
public int getFamilyCalledCount(){
|
||||||
|
return familyCalled;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of families with genotype information for all members
|
||||||
|
public int getVarFamilyCalledCount(){
|
||||||
|
return varFamilyCalled;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of families missing genotypes for one or more of their members
|
||||||
|
public int getFamilyNoCallCount(){
|
||||||
|
return nocall;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Number of families with genotypes below the set quality threshold
|
||||||
|
public int getFamilyLowQualsCount(){
|
||||||
|
return lowQual;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getViolationsCount(){
|
||||||
|
return violations_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of alt alleles inherited from het parents (no violation)
|
||||||
|
public int getParentHetInheritedVar(){
|
||||||
|
return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from het parents (no violation)
|
||||||
|
public int getParentHetInheritedRef(){
|
||||||
|
return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of HomRef/HomRef/HomRef trios
|
||||||
|
public int getRefRefRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of HomVar/HomVar/HomVar trios
|
||||||
|
public int getVarVarVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of HomRef/HomVar/Het trios
|
||||||
|
public int getRefVarHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) +
|
||||||
|
inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of Het/Het/Het trios
|
||||||
|
public int getHetHetHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of Het/Het/HomRef trios
|
||||||
|
public int getHetHetHomRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of Het/Het/HomVar trios
|
||||||
|
public int getHetHetHomVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from Het/Het parents (no violation)
|
||||||
|
public int getParentsHetHetInheritedRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||||
|
//return parentsHetHet_childRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of var alleles inherited from Het/Het parents (no violation)
|
||||||
|
public int getParentsHetHetInheritedVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR);
|
||||||
|
//return parentsHetHet_childVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from HomRef/Het parents (no violation)
|
||||||
|
public int getParentsRefHetInheritedRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||||
|
//return parentsHomRefHet_childRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of var alleles inherited from HomRef/Het parents (no violation)
|
||||||
|
public int getParentsRefHetInheritedVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||||
|
//return parentsHomRefHet_childVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of ref alleles inherited from HomVar/Het parents (no violation)
|
||||||
|
public int getParentsVarHetInheritedRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||||
|
//return parentsHomVarHet_childRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of var alleles inherited from HomVar/Het parents (no violation)
|
||||||
|
public int getParentsVarHetInheritedVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR);
|
||||||
|
//return parentsHomVarHet_childVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR
|
||||||
|
public int getParentsRefRefChildVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_REF -> HET
|
||||||
|
public int getParentsRefRefChildHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HET -> HOM_VAR
|
||||||
|
public int getParentsRefHetChildVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR)
|
||||||
|
+ inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR
|
||||||
|
public int getParentsRefVarChildVar(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR)
|
||||||
|
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF
|
||||||
|
public int getParentsRefVarChildRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||||
|
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/HET -> HOM_REF
|
||||||
|
public int getParentsVarHetChildRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF)
|
||||||
|
+ inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF
|
||||||
|
public int getParentsVarVarChildRef(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/HOM_VAR -> HET
|
||||||
|
public int getParentsVarVarChildHet(){
|
||||||
|
return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_VAR/? -> HOM_REF
|
||||||
|
public int getParentVarChildRef(){
|
||||||
|
return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Count of violations of the type HOM_REF/? -> HOM_VAR
|
||||||
|
public int getParentRefChildVar(){
|
||||||
|
return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar();
|
||||||
|
}
|
||||||
|
|
||||||
|
//Returns a String containing all trios where a Mendelian violation was observed.
|
||||||
|
//The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..."
|
||||||
|
public String getViolationFamiliesString(){
|
||||||
|
if(violationFamilies.isEmpty())
|
||||||
|
return "";
|
||||||
|
|
||||||
|
Iterator<String> it = violationFamilies.iterator();
|
||||||
|
String violationFams = it.next();
|
||||||
|
while(it.hasNext()){
|
||||||
|
violationFams += ","+it.next();
|
||||||
|
}
|
||||||
|
return violationFams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getViolationFamilies(){
|
||||||
|
return violationFamilies;
|
||||||
|
}
|
||||||
|
|
||||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
|
||||||
|
|
||||||
public String getSampleMom() {
|
|
||||||
return sampleMom;
|
|
||||||
}
|
|
||||||
public String getSampleDad() {
|
|
||||||
return sampleDad;
|
|
||||||
}
|
|
||||||
public String getSampleChild() {
|
|
||||||
return sampleChild;
|
|
||||||
}
|
|
||||||
public double getMinGenotypeQuality() {
|
public double getMinGenotypeQuality() {
|
||||||
return minGenotypeQuality;
|
return minGenotypeQuality;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Constructor
|
||||||
* @param sampleMomP - sample name of mom
|
|
||||||
* @param sampleDadP - sample name of dad
|
|
||||||
* @param sampleChildP - sample name of child
|
|
||||||
*/
|
|
||||||
public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) {
|
|
||||||
sampleMom = sampleMomP;
|
|
||||||
sampleDad = sampleDadP;
|
|
||||||
sampleChild = sampleChildP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param family - the sample names string "mom+dad=child"
|
|
||||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||||
*/
|
|
||||||
public MendelianViolation(String family, double minGenotypeQualityP) {
|
|
||||||
minGenotypeQuality = minGenotypeQualityP;
|
|
||||||
|
|
||||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
|
||||||
if (m.matches()) {
|
|
||||||
sampleMom = m.group(1);
|
|
||||||
sampleDad = m.group(2);
|
|
||||||
sampleChild = m.group(3);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
|
|
||||||
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
|
|
||||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
|
||||||
*/
|
|
||||||
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
|
|
||||||
sampleMom = sample.getMother().getID();
|
|
||||||
sampleDad = sample.getFather().getID();
|
|
||||||
sampleChild = sample.getID();
|
|
||||||
minGenotypeQuality = minGenotypeQualityP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
|
||||||
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
|
||||||
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
|
|
||||||
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
|
||||||
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
|
||||||
*/
|
|
||||||
public boolean setAlleles (VariantContext vc)
|
|
||||||
{
|
|
||||||
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
|
||||||
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
|
||||||
Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild);
|
|
||||||
|
|
||||||
if (gMom == null || gDad == null || gChild == null)
|
|
||||||
throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild));
|
|
||||||
|
|
||||||
if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() ||
|
|
||||||
gMom.getPhredScaledQual() < minGenotypeQuality ||
|
|
||||||
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
|
||||||
gChild.getPhredScaledQual() < minGenotypeQuality ) {
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
allelesMom = gMom.getAlleles();
|
|
||||||
allelesDad = gDad.getAlleles();
|
|
||||||
allelesChild = gChild.getAlleles();
|
|
||||||
return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
*
|
||||||
|
*/
|
||||||
|
public MendelianViolation(double minGenotypeQualityP) {
|
||||||
|
this(minGenotypeQualityP,true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||||
|
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||||
|
*/
|
||||||
|
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) {
|
||||||
|
minGenotypeQuality = minGenotypeQualityP;
|
||||||
|
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||||
|
violationFamilies = new ArrayList<String>();
|
||||||
|
createInheritanceMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor
|
||||||
|
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||||
|
* @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored.
|
||||||
|
* @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too.
|
||||||
|
*/
|
||||||
|
public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) {
|
||||||
|
minGenotypeQuality = minGenotypeQualityP;
|
||||||
|
this.abortOnSampleNotFound = abortOnSampleNotFound;
|
||||||
|
violationFamilies = new ArrayList<String>();
|
||||||
|
createInheritanceMap();
|
||||||
|
allCalledOnly = completeTriosOnly;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param families the families to be checked for Mendelian violations
|
||||||
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
|
* @param vc the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||||
* @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public boolean isViolation(VariantContext vc)
|
|
||||||
{
|
|
||||||
return setAlleles(vc) && isViolation();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return whether or not there is a mendelian violation at the site.
|
* @return whether or not there is a mendelian violation at the site.
|
||||||
*/
|
*/
|
||||||
public boolean isViolation() {
|
public int countViolations(Map<String, Set<Sample>> families, VariantContext vc){
|
||||||
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
|
||||||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
//Reset counts
|
||||||
return false;
|
nocall = 0;
|
||||||
return true;
|
lowQual = 0;
|
||||||
|
familyCalled = 0;
|
||||||
|
varFamilyCalled = 0;
|
||||||
|
violations_total=0;
|
||||||
|
violationFamilies.clear();
|
||||||
|
clearInheritanceMap();
|
||||||
|
|
||||||
|
for(Set<Sample> family : families.values()){
|
||||||
|
Iterator<Sample> sampleIterator = family.iterator();
|
||||||
|
Sample sample;
|
||||||
|
while(sampleIterator.hasNext()){
|
||||||
|
sample = sampleIterator.next();
|
||||||
|
if(sample.getParents().size() > 0)
|
||||||
|
updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return violations_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){
|
||||||
|
|
||||||
|
//Reset counts
|
||||||
|
nocall = 0;
|
||||||
|
lowQual = 0;
|
||||||
|
familyCalled = 0;
|
||||||
|
varFamilyCalled = 0;
|
||||||
|
violations_total=0;
|
||||||
|
violationFamilies.clear();
|
||||||
|
clearInheritanceMap();
|
||||||
|
updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc);
|
||||||
|
return violations_total>0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){
|
||||||
|
|
||||||
|
int count;
|
||||||
|
Genotype gMom = vc.getGenotype(motherId);
|
||||||
|
Genotype gDad = vc.getGenotype(fatherId);
|
||||||
|
Genotype gChild = vc.getGenotype(childId);
|
||||||
|
|
||||||
|
if (gMom == null || gDad == null || gChild == null){
|
||||||
|
if(abortOnSampleNotFound)
|
||||||
|
throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId));
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
//Count No calls
|
||||||
|
if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){
|
||||||
|
nocall++;
|
||||||
|
}
|
||||||
|
else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){
|
||||||
|
nocall++;
|
||||||
|
}
|
||||||
|
//Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned
|
||||||
|
else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality ||
|
||||||
|
gDad.getPhredScaledQual() < minGenotypeQuality ||
|
||||||
|
gChild.getPhredScaledQual() < minGenotypeQuality )) {
|
||||||
|
lowQual++;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
//Count all families per loci called
|
||||||
|
familyCalled++;
|
||||||
|
//If the family is all homref, not too interesting
|
||||||
|
if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef()))
|
||||||
|
{
|
||||||
|
varFamilyCalled++;
|
||||||
|
if(isViolation(gMom, gDad, gChild)){
|
||||||
|
violationFamilies.add(familyId);
|
||||||
|
violations_total++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType());
|
||||||
|
inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) {
|
||||||
|
//1 parent is no "call
|
||||||
|
if(!gMom.isCalled()){
|
||||||
|
return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef());
|
||||||
|
}
|
||||||
|
else if(!gDad.isCalled()){
|
||||||
|
return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef());
|
||||||
|
}
|
||||||
|
//Both parents have genotype information
|
||||||
|
return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) ||
|
||||||
|
gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createInheritanceMap(){
|
||||||
|
|
||||||
|
inheritance = new EnumMap<Genotype.Type,EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>>(Genotype.Type.class);
|
||||||
|
for(Genotype.Type mType : Genotype.Type.values()){
|
||||||
|
inheritance.put(mType, new EnumMap<Genotype.Type,EnumMap<Genotype.Type,Integer>>(Genotype.Type.class));
|
||||||
|
for(Genotype.Type dType : Genotype.Type.values()){
|
||||||
|
inheritance.get(mType).put(dType, new EnumMap<Genotype.Type,Integer>(Genotype.Type.class));
|
||||||
|
for(Genotype.Type cType : Genotype.Type.values()){
|
||||||
|
inheritance.get(mType).get(dType).put(cType, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearInheritanceMap(){
|
||||||
|
for(Genotype.Type mType : Genotype.Type.values()){
|
||||||
|
for(Genotype.Type dType : Genotype.Type.values()){
|
||||||
|
for(Genotype.Type cType : Genotype.Type.values()){
|
||||||
|
inheritance.get(mType).get(dType).put(cType, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the likelihood ratio for a mendelian violation
|
* @return the likelihood ratio for a mendelian violation
|
||||||
*/
|
*/
|
||||||
public double violationLikelihoodRatio(VariantContext vc) {
|
public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) {
|
||||||
double[] logLikAssignments = new double[27];
|
double[] logLikAssignments = new double[27];
|
||||||
// the matrix to set up is
|
// the matrix to set up is
|
||||||
// MOM DAD CHILD
|
// MOM DAD CHILD
|
||||||
|
|
@ -152,9 +404,9 @@ public class MendelianViolation {
|
||||||
// AA AB | AB
|
// AA AB | AB
|
||||||
// |- BB
|
// |- BB
|
||||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector();
|
||||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector();
|
||||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector();
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue