Adding in likelihood calculations for mendelian violations. Also fixing a minor and rare bug in SelectVariants when specifying family structure on the command line.
This commit is contained in:
parent
034b868588
commit
3b51d9106a
|
|
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||||
protected boolean indelsOnly = false;
|
protected boolean indelsOnly = false;
|
||||||
|
|
||||||
|
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
||||||
|
public String familyStr = null;
|
||||||
|
|
||||||
|
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||||
|
public double minGenotypeQualityP = 0.0;
|
||||||
|
|
||||||
private VariantAnnotatorEngine engine;
|
private VariantAnnotatorEngine engine;
|
||||||
|
|
||||||
private Collection<VariantContext> indelBufferContext;
|
private Collection<VariantContext> indelBufferContext;
|
||||||
|
|
|
||||||
|
|
@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||||
}
|
}
|
||||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
|
@ -32,6 +34,9 @@ public class MendelianViolation {
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||||
|
|
||||||
|
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||||
|
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||||
|
|
||||||
|
|
||||||
public String getSampleMom() {
|
public String getSampleMom() {
|
||||||
return sampleMom;
|
return sampleMom;
|
||||||
|
|
@ -168,4 +173,41 @@ public class MendelianViolation {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the likelihood ratio for a mendelian violation
|
||||||
|
*/
|
||||||
|
public double violationLikelihoodRatio(VariantContext vc) {
|
||||||
|
double[] logLikAssignments = new double[27];
|
||||||
|
// the matrix to set up is
|
||||||
|
// MOM DAD CHILD
|
||||||
|
// |- AA
|
||||||
|
// AA AA | AB
|
||||||
|
// |- BB
|
||||||
|
// |- AA
|
||||||
|
// AA AB | AB
|
||||||
|
// |- BB
|
||||||
|
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||||
|
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||||
|
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||||
|
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||||
|
int offset = 0;
|
||||||
|
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||||
|
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||||
|
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||||
|
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double[] mvLiks = new double[12];
|
||||||
|
double[] nonMVLiks = new double[15];
|
||||||
|
for ( int i = 0; i < 12; i ++ ) {
|
||||||
|
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int i = 0; i < 15; i++) {
|
||||||
|
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue