SelectVariants: added parameters for mendelian violation. Given a trio vcf, it will generate a VCF with the sites that are mendelian violations.
GenotypeAndValidate: now annotates the validations with callStatus. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5425 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b03055099a
commit
4a84a81d17
|
|
@ -25,6 +25,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.MendelianViolation;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFConstants;
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
|
@ -40,15 +41,9 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
import org.omg.PortableInterceptor.DISCARDING;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
|
* Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
|
||||||
|
|
@ -75,6 +70,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
||||||
private String discordanceRodName = "";
|
private String discordanceRodName = "";
|
||||||
|
|
||||||
|
@Argument(fullName="family", shortName="fam", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||||
|
private String FAMILY_STRUCTURE = "";
|
||||||
|
|
||||||
|
@Argument(fullName="mendelian_violation_quality", shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||||
|
private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0;
|
||||||
|
|
||||||
|
|
||||||
private ArrayList<String> selectNames = new ArrayList<String>();
|
private ArrayList<String> selectNames = new ArrayList<String>();
|
||||||
private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
|
private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
|
||||||
|
|
||||||
|
|
@ -82,6 +84,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
private boolean DISCORDANCE_ONLY = false;
|
private boolean DISCORDANCE_ONLY = false;
|
||||||
|
|
||||||
|
private MendelianViolation mv;
|
||||||
|
private boolean MENDELIAN_VIOLATIONS = false;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
*/
|
*/
|
||||||
|
|
@ -93,13 +99,19 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||||
|
|
||||||
DISCORDANCE_ONLY = discordanceRodName.length() > 0;
|
|
||||||
samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS);
|
samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS);
|
||||||
for (String sample : samples) {
|
for (String sample : samples) {
|
||||||
logger.info("Including sample '" + sample + "'");
|
logger.info("Including sample '" + sample + "'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DISCORDANCE_ONLY = discordanceRodName.length() > 0;
|
||||||
if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName);
|
if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName);
|
||||||
|
|
||||||
|
if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||||
|
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||||
|
MENDELIAN_VIOLATIONS = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize VCF header
|
// Initialize VCF header
|
||||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||||
|
|
@ -145,25 +157,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( vcs == null || vcs.size() == 0) {
|
if ( vcs == null || vcs.size() == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
VariantContext sub = subsetRecord(vc, samples);
|
if (MENDELIAN_VIOLATIONS) {
|
||||||
|
if (mv.isViolation(vc)) {
|
||||||
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
vcfWriter.add(vc, ref.getBase());
|
||||||
//System.out.printf("%s%n",sub.toString());
|
|
||||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
|
||||||
if ( !VariantContextUtils.match(sub, jexl) ) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vcfWriter.add(sub, ref.getBase());
|
else {
|
||||||
|
VariantContext sub = subsetRecord(vc, samples);
|
||||||
|
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||||
|
//System.out.printf("%s%n",sub.toString());
|
||||||
|
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||||
|
if ( !VariantContextUtils.match(sub, jexl) ) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vcfWriter.add(sub, ref.getBase());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import org.broad.tribble.util.variantcontext.MutableVariantContext;
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||||
|
|
@ -45,6 +46,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
import sun.management.counter.Variability;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -199,7 +201,13 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
else
|
else
|
||||||
counter.numFP = 1L;
|
counter.numFP = 1L;
|
||||||
}
|
}
|
||||||
vcfWriter.add(vcComp, ref.getBase());
|
if (!vcComp.hasAttribute("callStatus")) {
|
||||||
|
MutableVariantContext mvc = new MutableVariantContext(vcComp);
|
||||||
|
mvc.putAttribute("callStatus", call.confidentlyCalled ? "confident" : "notConfident" );
|
||||||
|
vcfWriter.add(mvc, ref.getBase());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
vcfWriter.add(vcComp, ref.getBase());
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -224,7 +232,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
logger.info("FP = " + reduceSum.numFP);
|
logger.info("FP = " + reduceSum.numFP);
|
||||||
logger.info("FN = " + reduceSum.numFN);
|
logger.info("FN = " + reduceSum.numFN);
|
||||||
logger.info("PPV = " + ((double) reduceSum.numTP /( reduceSum.numTP + reduceSum.numFP)));
|
logger.info("PPV = " + ((double) reduceSum.numTP /( reduceSum.numTP + reduceSum.numFP)));
|
||||||
logger.info("FPV = " + ((double) reduceSum.numTN /( reduceSum.numTN + reduceSum.numFN)));
|
logger.info("NPV = " + ((double) reduceSum.numTN /( reduceSum.numTN + reduceSum.numFN)));
|
||||||
logger.info("Uncovered = " + reduceSum.numUncovered);
|
logger.info("Uncovered = " + reduceSum.numUncovered);
|
||||||
logger.info("confidently called = " + reduceSum.numConfidentCalls);
|
logger.info("confidently called = " + reduceSum.numConfidentCalls);
|
||||||
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
|
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue