SelectVariants: now keeps the YAML stuff internal... it's there if you wanna use it, but won't be published anymore. Official parameter is the string for now.

VariantEval: now sports the new MendelianViolation utility class.
MendelianViolationClassifier: I noticed I had broken chartl's walker by changing VariantEval, so I took the liberty to modify it to use the new library too, though I kept modifications to a minimum, could have gone into full integration if this is a useful tool, but since it's in oneoffs, I decided not to go all out.
MendelianViolation: Some getter methods were added for chartl and VariantEval.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5447 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-03-15 18:36:55 +00:00
parent 653fb09bb7
commit 4b9b767eb1
4 changed files with 85 additions and 55 deletions

View File

@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Arrays;
@ -47,7 +48,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "Number of mendelian variants found")
long nVariants;
@DataPoint(description = "Number of mendelian violations found")
long nViolations;
@ -60,47 +60,17 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
long KidHomVar_ParentHomRef;
TrioStructure trio;
double mendelianViolationQualThreshold;
MendelianViolation mv;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public static class TrioStructure {
public String mom, dad, child;
}
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
trio.dad = m.group(2);
trio.child = m.group(3);
return trio;
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
}
// todo: fix
public void initialize(VariantEvalWalker walker) {
trio = parseTrioDescription(walker.getFamilyStructure());
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
}
public boolean enabled() {
//return getVEWalker().FAMILY_STRUCTURE != null;
return true;
}
private double getQThreshold() {
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
//return 0.0;
}
public String getName() {
return "mendelian_violations";
}
@ -111,19 +81,14 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
Genotype momG = vc.getGenotype(trio.mom);
Genotype dadG = vc.getGenotype(trio.dad);
Genotype childG = vc.getGenotype(trio.child);
if (includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG)) {
if (mv.setAlleles(vc)) {
nVariants++;
if (momG == null || dadG == null || childG == null)
throw new IllegalArgumentException(String.format("VariantContext didn't contain genotypes for expected trio members: mom=%s dad=%s child=%s", trio.mom, trio.dad, trio.child));
Genotype momG = vc.getGenotype(mv.getSampleMom());
Genotype dadG = vc.getGenotype(mv.getSampleDad());
Genotype childG = vc.getGenotype(mv.getSampleChild());
// all genotypes are good, so let's see if child is a violation
if (isViolation(vc, momG, dadG, childG)) {
if (mv.isViolation()) {
nViolations++;
String label;
@ -151,6 +116,42 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
return null; // we don't capture any intersting sites
}
/*
private double getQThreshold() {
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
//return 0.0;
}
TrioStructure trio;
double mendelianViolationQualThreshold;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public static class TrioStructure {
public String mom, dad, child;
}
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
trio.dad = m.group(2);
trio.child = m.group(3);
return trio;
} else {
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
}
}
public void initialize(VariantEvalWalker walker) {
trio = parseTrioDescription(walker.getFamilyStructure());
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
}
private boolean includeGenotype(Genotype g) {
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
}
@ -181,4 +182,9 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
return true;
}
*/
}

View File

@ -70,8 +70,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
private String discordanceRodName = "";
@Deprecated
@Argument(fullName="family", shortName="fam", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = "";
@Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only. Sample metadata information will be taken from YAML file (passed with -SM)", required=false)
@ -112,8 +111,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (MENDELIAN_VIOLATIONS)
mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD);
else if (!FAMILY_STRUCTURE.isEmpty())
else if (!FAMILY_STRUCTURE.isEmpty()) {
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
MENDELIAN_VIOLATIONS = true;
}
// Initialize VCF header
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);

View File

@ -18,7 +18,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.MendelianViolationEvaluator;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -58,16 +57,18 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
*********** PRIVATE CLASSES
*/
public class ExtendedTrioStructure extends MendelianViolationEvaluator.TrioStructure {
public class ExtendedTrioStructure {
public String mom, dad, child;
public HashMap<String,HomozygosityRegion> homozygousRegions;
public HashMap<String,Integer> homozygousRegionCounts;
public HashMap<String,MendelianInfoKey> regionKeys;
public org.broadinstitute.sting.utils.MendelianViolation mvObject;
public ExtendedTrioStructure(String family) {
MendelianViolationEvaluator.TrioStructure struct = MendelianViolationEvaluator.parseTrioDescription(family);
this.child = struct.child;
this.mom = struct.mom;
this.dad = struct.dad;
mvObject = new org.broadinstitute.sting.utils.MendelianViolation(family, 0);
this.child = mvObject.getSampleChild();
this.mom = mvObject.getSampleMom();
this.dad = mvObject.getSampleDad();
homozygousRegions = new HashMap<String,HomozygosityRegion>(3);
homozygousRegionCounts = new HashMap<String,Integer>(3);
homozygousRegions.put(child,null);
@ -414,7 +415,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
private MendelianViolation assessViolation(VariantContext varContext, RefMetaDataTracker tracker, ReferenceContext reference, AlignmentContext context) {
MendelianViolation violation;
if ( varContext != null ) {
if ( isComplete(varContext) && MendelianViolationEvaluator.isViolation(varContext,trioStructure) ) {
if ( isComplete(varContext) && trioStructure.mvObject.isViolation(varContext) ) {
if ( isDeNovo(varContext) ) {
violation = assessDeNovo(varContext,tracker,reference,context);
} else if ( isOppositeHomozygote(varContext) ) {

View File

@ -17,6 +17,8 @@ import java.util.regex.Pattern;
*/
public class MendelianViolation {
String sampleMom;
String sampleDad;
String sampleChild;
@ -29,6 +31,23 @@ public class MendelianViolation {
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
public String getSampleMom() {
return sampleMom;
}
public String getSampleDad() {
return sampleDad;
}
public String getSampleChild() {
return sampleChild;
}
public double getMinGenotypeQuality() {
return minGenotypeQuality;
}
/**
*
* @param sampleMomP - sample name of mom
@ -97,10 +116,13 @@ public class MendelianViolation {
/**
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
*/
private boolean setAlleles (VariantContext vc)
public boolean setAlleles (VariantContext vc)
{
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
@ -138,7 +160,7 @@ public class MendelianViolation {
/**
* @return whether or not there is a mendelian violation at the site.
*/
private boolean isViolation() {
public boolean isViolation() {
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
return false;