SelectVariants: now keeps the YAML stuff internal... it's there if you wanna use it, but won't be published anymore. Official parameter is the string for now.
VariantEval: now sports the new MendelianViolation utility class. MendelianViolationClassifier: I noticed I had broken chartl's walker by changing VariantEval, so I took the liberty to modify it to use the new library too, though I kept modifications to a minimum, could have gone into full integration if this is a useful tool, but since it's in oneoffs, I decided not to go all out. MendelianViolation: Some getter methods were added for chartl and VariantEval. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5447 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
653fb09bb7
commit
4b9b767eb1
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
@ -47,7 +48,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
@DataPoint(description = "Number of mendelian variants found")
|
@DataPoint(description = "Number of mendelian variants found")
|
||||||
long nVariants;
|
long nVariants;
|
||||||
|
|
||||||
@DataPoint(description = "Number of mendelian violations found")
|
@DataPoint(description = "Number of mendelian violations found")
|
||||||
long nViolations;
|
long nViolations;
|
||||||
|
|
||||||
|
|
@ -60,47 +60,17 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||||
long KidHomVar_ParentHomRef;
|
long KidHomVar_ParentHomRef;
|
||||||
|
|
||||||
TrioStructure trio;
|
MendelianViolation mv;
|
||||||
double mendelianViolationQualThreshold;
|
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
|
||||||
|
|
||||||
public static class TrioStructure {
|
|
||||||
public String mom, dad, child;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static TrioStructure parseTrioDescription(String family) {
|
|
||||||
Matcher m = FAMILY_PATTERN.matcher(family);
|
|
||||||
if (m.matches()) {
|
|
||||||
TrioStructure trio = new TrioStructure();
|
|
||||||
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
|
||||||
trio.mom = m.group(1);
|
|
||||||
trio.dad = m.group(2);
|
|
||||||
trio.child = m.group(3);
|
|
||||||
return trio;
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo: fix
|
|
||||||
public void initialize(VariantEvalWalker walker) {
|
public void initialize(VariantEvalWalker walker) {
|
||||||
trio = parseTrioDescription(walker.getFamilyStructure());
|
mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold());
|
||||||
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean enabled() {
|
public boolean enabled() {
|
||||||
//return getVEWalker().FAMILY_STRUCTURE != null;
|
//return getVEWalker().FAMILY_STRUCTURE != null;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double getQThreshold() {
|
|
||||||
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
|
||||||
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
|
||||||
//return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
return "mendelian_violations";
|
return "mendelian_violations";
|
||||||
}
|
}
|
||||||
|
|
@ -111,19 +81,14 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||||
Genotype momG = vc.getGenotype(trio.mom);
|
if (mv.setAlleles(vc)) {
|
||||||
Genotype dadG = vc.getGenotype(trio.dad);
|
|
||||||
Genotype childG = vc.getGenotype(trio.child);
|
|
||||||
|
|
||||||
if (includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG)) {
|
|
||||||
nVariants++;
|
nVariants++;
|
||||||
|
|
||||||
if (momG == null || dadG == null || childG == null)
|
Genotype momG = vc.getGenotype(mv.getSampleMom());
|
||||||
throw new IllegalArgumentException(String.format("VariantContext didn't contain genotypes for expected trio members: mom=%s dad=%s child=%s", trio.mom, trio.dad, trio.child));
|
Genotype dadG = vc.getGenotype(mv.getSampleDad());
|
||||||
|
Genotype childG = vc.getGenotype(mv.getSampleChild());
|
||||||
|
|
||||||
// all genotypes are good, so let's see if child is a violation
|
if (mv.isViolation()) {
|
||||||
|
|
||||||
if (isViolation(vc, momG, dadG, childG)) {
|
|
||||||
nViolations++;
|
nViolations++;
|
||||||
|
|
||||||
String label;
|
String label;
|
||||||
|
|
@ -151,6 +116,42 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
return null; // we don't capture any intersting sites
|
return null; // we don't capture any intersting sites
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
private double getQThreshold() {
|
||||||
|
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||||
|
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
|
||||||
|
//return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
TrioStructure trio;
|
||||||
|
double mendelianViolationQualThreshold;
|
||||||
|
|
||||||
|
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||||
|
|
||||||
|
public static class TrioStructure {
|
||||||
|
public String mom, dad, child;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TrioStructure parseTrioDescription(String family) {
|
||||||
|
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||||
|
if (m.matches()) {
|
||||||
|
TrioStructure trio = new TrioStructure();
|
||||||
|
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||||
|
trio.mom = m.group(1);
|
||||||
|
trio.dad = m.group(2);
|
||||||
|
trio.child = m.group(3);
|
||||||
|
return trio;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(VariantEvalWalker walker) {
|
||||||
|
trio = parseTrioDescription(walker.getFamilyStructure());
|
||||||
|
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
|
||||||
|
}
|
||||||
|
|
||||||
private boolean includeGenotype(Genotype g) {
|
private boolean includeGenotype(Genotype g) {
|
||||||
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
|
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
|
||||||
}
|
}
|
||||||
|
|
@ -181,4 +182,9 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -70,8 +70,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
||||||
private String discordanceRodName = "";
|
private String discordanceRodName = "";
|
||||||
|
|
||||||
@Deprecated
|
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||||
@Argument(fullName="family", shortName="fam", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
|
||||||
private String FAMILY_STRUCTURE = "";
|
private String FAMILY_STRUCTURE = "";
|
||||||
|
|
||||||
@Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only. Sample metadata information will be taken from YAML file (passed with -SM)", required=false)
|
@Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only. Sample metadata information will be taken from YAML file (passed with -SM)", required=false)
|
||||||
|
|
@ -112,8 +111,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
if (MENDELIAN_VIOLATIONS)
|
if (MENDELIAN_VIOLATIONS)
|
||||||
mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||||
else if (!FAMILY_STRUCTURE.isEmpty())
|
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||||
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||||
|
MENDELIAN_VIOLATIONS = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize VCF header
|
// Initialize VCF header
|
||||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.MendelianViolationEvaluator;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -58,16 +57,18 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
*********** PRIVATE CLASSES
|
*********** PRIVATE CLASSES
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ExtendedTrioStructure extends MendelianViolationEvaluator.TrioStructure {
|
public class ExtendedTrioStructure {
|
||||||
|
public String mom, dad, child;
|
||||||
public HashMap<String,HomozygosityRegion> homozygousRegions;
|
public HashMap<String,HomozygosityRegion> homozygousRegions;
|
||||||
public HashMap<String,Integer> homozygousRegionCounts;
|
public HashMap<String,Integer> homozygousRegionCounts;
|
||||||
public HashMap<String,MendelianInfoKey> regionKeys;
|
public HashMap<String,MendelianInfoKey> regionKeys;
|
||||||
|
public org.broadinstitute.sting.utils.MendelianViolation mvObject;
|
||||||
|
|
||||||
public ExtendedTrioStructure(String family) {
|
public ExtendedTrioStructure(String family) {
|
||||||
MendelianViolationEvaluator.TrioStructure struct = MendelianViolationEvaluator.parseTrioDescription(family);
|
mvObject = new org.broadinstitute.sting.utils.MendelianViolation(family, 0);
|
||||||
this.child = struct.child;
|
this.child = mvObject.getSampleChild();
|
||||||
this.mom = struct.mom;
|
this.mom = mvObject.getSampleMom();
|
||||||
this.dad = struct.dad;
|
this.dad = mvObject.getSampleDad();
|
||||||
homozygousRegions = new HashMap<String,HomozygosityRegion>(3);
|
homozygousRegions = new HashMap<String,HomozygosityRegion>(3);
|
||||||
homozygousRegionCounts = new HashMap<String,Integer>(3);
|
homozygousRegionCounts = new HashMap<String,Integer>(3);
|
||||||
homozygousRegions.put(child,null);
|
homozygousRegions.put(child,null);
|
||||||
|
|
@ -414,7 +415,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
|
||||||
private MendelianViolation assessViolation(VariantContext varContext, RefMetaDataTracker tracker, ReferenceContext reference, AlignmentContext context) {
|
private MendelianViolation assessViolation(VariantContext varContext, RefMetaDataTracker tracker, ReferenceContext reference, AlignmentContext context) {
|
||||||
MendelianViolation violation;
|
MendelianViolation violation;
|
||||||
if ( varContext != null ) {
|
if ( varContext != null ) {
|
||||||
if ( isComplete(varContext) && MendelianViolationEvaluator.isViolation(varContext,trioStructure) ) {
|
if ( isComplete(varContext) && trioStructure.mvObject.isViolation(varContext) ) {
|
||||||
if ( isDeNovo(varContext) ) {
|
if ( isDeNovo(varContext) ) {
|
||||||
violation = assessDeNovo(varContext,tracker,reference,context);
|
violation = assessDeNovo(varContext,tracker,reference,context);
|
||||||
} else if ( isOppositeHomozygote(varContext) ) {
|
} else if ( isOppositeHomozygote(varContext) ) {
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,8 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
public class MendelianViolation {
|
public class MendelianViolation {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
String sampleMom;
|
String sampleMom;
|
||||||
String sampleDad;
|
String sampleDad;
|
||||||
String sampleChild;
|
String sampleChild;
|
||||||
|
|
@ -29,6 +31,23 @@ public class MendelianViolation {
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||||
|
|
||||||
|
|
||||||
|
public String getSampleMom() {
|
||||||
|
return sampleMom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSampleDad() {
|
||||||
|
return sampleDad;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSampleChild() {
|
||||||
|
return sampleChild;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getMinGenotypeQuality() {
|
||||||
|
return minGenotypeQuality;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param sampleMomP - sample name of mom
|
* @param sampleMomP - sample name of mom
|
||||||
|
|
@ -97,10 +116,13 @@ public class MendelianViolation {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
|
||||||
|
* isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
|
||||||
|
* before evaluating it for mendelian violation, you can call setAlleles and then isViolation().
|
||||||
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
* @param vc - the variant context to extract the genotypes and alleles for mom, dad and child.
|
||||||
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
* @return false if couldn't find the genotypes or context has empty alleles. True otherwise.
|
||||||
*/
|
*/
|
||||||
private boolean setAlleles (VariantContext vc)
|
public boolean setAlleles (VariantContext vc)
|
||||||
{
|
{
|
||||||
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom);
|
||||||
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad);
|
||||||
|
|
@ -138,7 +160,7 @@ public class MendelianViolation {
|
||||||
/**
|
/**
|
||||||
* @return whether or not there is a mendelian violation at the site.
|
* @return whether or not there is a mendelian violation at the site.
|
||||||
*/
|
*/
|
||||||
private boolean isViolation() {
|
public boolean isViolation() {
|
||||||
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) ||
|
||||||
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0)))
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue