SelectVariants: now keeps the YAML stuff internal... it's there if you wanna use it, but won't be published anymore. Official parameter is the string for now.
VariantEval: now sports the new MendelianViolation utility class. MendelianViolationClassifier: I noticed I had broken chartl's walker by changing VariantEval, so I took the liberty to modify it to use the new library too, though I kept modifications to a minimum, could have gone into full integration if this is a useful tool, but since it's in oneoffs, I decided not to go all out. MendelianViolation: Some getter methods were added for chartl and VariantEval. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5447 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
653fb09bb7
commit
4b9b767eb1
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -47,7 +48,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
|
||||
@DataPoint(description = "Number of mendelian variants found")
|
||||
long nVariants;
|
||||
|
||||
@DataPoint(description = "Number of mendelian violations found")
|
||||
long nViolations;
|
||||
|
||||
|
|
@ -60,47 +60,17 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||
long KidHomVar_ParentHomRef;
|
||||
|
||||
TrioStructure trio;
|
||||
double mendelianViolationQualThreshold;
|
||||
MendelianViolation mv;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public static class TrioStructure {
|
||||
public String mom, dad, child;
|
||||
}
|
||||
|
||||
public static TrioStructure parseTrioDescription(String family) {
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
TrioStructure trio = new TrioStructure();
|
||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||
trio.mom = m.group(1);
|
||||
trio.dad = m.group(2);
|
||||
trio.child = m.group(3);
|
||||
return trio;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
}
|
||||
|
||||
// todo: fix
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
||||
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||
}
|
||||
|
||||
|
||||
public boolean enabled() {
|
||||
//return getVEWalker().FAMILY_STRUCTURE != null;
|
||||
return true;
|
||||
}
|
||||
|
||||
private double getQThreshold() {
|
||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
//return 0.0;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "mendelian_violations";
|
||||
}
|
||||
|
|
@ -111,19 +81,14 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||
Genotype momG = vc.getGenotype(trio.mom);
|
||||
Genotype dadG = vc.getGenotype(trio.dad);
|
||||
Genotype childG = vc.getGenotype(trio.child);
|
||||
|
||||
if (includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG)) {
|
||||
if (mv.setAlleles(vc)) {
|
||||
nVariants++;
|
||||
|
||||
if (momG == null || dadG == null || childG == null)
|
||||
throw new IllegalArgumentException(String.format("VariantContext didn't contain genotypes for expected trio members: mom=%s dad=%s child=%s", trio.mom, trio.dad, trio.child));
|
||||
Genotype momG = vc.getGenotype(mv.getSampleMom());
|
||||
Genotype dadG = vc.getGenotype(mv.getSampleDad());
|
||||
Genotype childG = vc.getGenotype(mv.getSampleChild());
|
||||
|
||||
// all genotypes are good, so let's see if child is a violation
|
||||
|
||||
if (isViolation(vc, momG, dadG, childG)) {
|
||||
if (mv.isViolation()) {
|
||||
nViolations++;
|
||||
|
||||
String label;
|
||||
|
|
@ -151,6 +116,42 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
return null; // we don't capture any intersting sites
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
private double getQThreshold() {
|
||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
//return 0.0;
|
||||
}
|
||||
|
||||
TrioStructure trio;
|
||||
double mendelianViolationQualThreshold;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
public static class TrioStructure {
|
||||
public String mom, dad, child;
|
||||
}
|
||||
|
||||
public static TrioStructure parseTrioDescription(String family) {
|
||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||
if (m.matches()) {
|
||||
TrioStructure trio = new TrioStructure();
|
||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||
trio.mom = m.group(1);
|
||||
trio.dad = m.group(2);
|
||||
trio.child = m.group(3);
|
||||
return trio;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
||||
}
|
||||
|
||||
private boolean includeGenotype(Genotype g) {
|
||||
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
|
||||
}
|
||||
|
|
@ -181,4 +182,9 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,8 +70,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
||||
private String discordanceRodName = "";
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="family", shortName="fam", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
private String FAMILY_STRUCTURE = "";
|
||||
|
||||
@Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only. Sample metadata information will be taken from YAML file (passed with -SM)", required=false)
|
||||
|
|
@ -112,8 +111,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
if (MENDELIAN_VIOLATIONS)
|
||||
mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||
else if (!FAMILY_STRUCTURE.isEmpty())
|
||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||
MENDELIAN_VIOLATIONS = true;
|
||||
}
|
||||
|
||||
// Initialize VCF header
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.MendelianViolationEvaluator;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -58,16 +57,18 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
*********** PRIVATE CLASSES
|
||||
*/
|
||||
|
||||
public class ExtendedTrioStructure extends MendelianViolationEvaluator.TrioStructure {
|
||||
public class ExtendedTrioStructure {
|
||||
public String mom, dad, child;
|
||||
public HashMap<String,HomozygosityRegion> homozygousRegions;
|
||||
public HashMap<String,Integer> homozygousRegionCounts;
|
||||
public HashMap<String,MendelianInfoKey> regionKeys;
|
||||
public org.broadinstitute.sting.utils.MendelianViolation mvObject;
|
||||
|
||||
public ExtendedTrioStructure(String family) {
|
||||
MendelianViolationEvaluator.TrioStructure struct = MendelianViolationEvaluator.parseTrioDescription(family);
|
||||
this.child = struct.child;
|
||||
this.mom = struct.mom;
|
||||
this.dad = struct.dad;
|
||||
mvObject = new org.broadinstitute.sting.utils.MendelianViolation(family, 0);
|
||||
this.child = mvObject.getSampleChild();
|
||||
this.mom = mvObject.getSampleMom();
|
||||
this.dad = mvObject.getSampleDad();
|
||||
homozygousRegions = new HashMap<String,HomozygosityRegion>(3);
|
||||
homozygousRegionCounts = new HashMap<String,Integer>(3);
|
||||
homozygousRegions.put(child,null);
|
||||
|
|
@ -414,7 +415,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
|||
private MendelianViolation assessViolation(VariantContext varContext, RefMetaDataTracker tracker, ReferenceContext reference, AlignmentContext context) {
|
||||
MendelianViolation violation;
|
||||
if ( varContext != null ) {
|
||||
if ( isComplete(varContext) && MendelianViolationEvaluator.isViolation(varContext,trioStructure) ) {
|
||||
if ( isComplete(varContext) && trioStructure.mvObject.isViolation(varContext) ) {
|
||||
if ( isDeNovo(varContext) ) {
|
||||
violation = assessDeNovo(varContext,tracker,reference,context);
|
||||
} else if ( isOppositeHomozygote(varContext) ) {
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ import java.util.regex.Pattern;
|
|||
*/
|
||||
public class MendelianViolation {
|
||||
|
||||
|
||||
|
||||
String sampleMom;
|
||||
String sampleDad;
|
||||
String sampleChild;
|
||||
|
|
@ -29,6 +31,23 @@ public class MendelianViolation {
|
|||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
|
||||
public String getSampleMom() {
|
||||
return sampleMom;
|
||||
}
|
||||
|
||||
public String getSampleDad() {
|
||||
return sampleDad;
|
||||
}
|
||||
|
||||
public String getSampleChild() {
|
||||
return sampleChild;
|
||||
}
|
||||
|
||||
public double getMinGenotypeQuality() {
|
||||
return minGenotypeQuality;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sampleMomP - sample name of mom
|
||||
|
|
@ -97,10 +116,13 @@ public class MendelianViolation {
|
|||
|
||||
|
||||
/**
|
||||
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
||||
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
||||
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
|
||||
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
||||
*/
|
||||
private boolean setAlleles (VariantContext vc)
|
||||
public boolean setAlleles (VariantContext vc)
|
||||
{
|
||||
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
||||
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
||||
|
|
@ -138,7 +160,7 @@ public class MendelianViolation {
|
|||
/**
|
||||
* @return whether or not there is a mendelian violation at the site.
|
||||
*/
|
||||
private boolean isViolation() {
|
||||
public boolean isViolation() {
|
||||
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
||||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
||||
return false;
|
||||
|
|
|
|||
Loading…
Reference in New Issue