SelectVariants: added parameters for mendelian violation. Given a trio vcf, it will generate a VCF with the sites that are mendelian violations.
GenotypeAndValidate: now annotates the validations with callStatus. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5425 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b03055099a
commit
4a84a81d17
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.MendelianViolation;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
|
|
@ -40,15 +41,9 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
|
|||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.omg.PortableInterceptor.DISCARDING;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
/**
|
||||
* Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria,
|
||||
|
|
@ -75,6 +70,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
||||
private String discordanceRodName = "";
|
||||
|
||||
@Argument(fullName="family", shortName="fam", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
private String FAMILY_STRUCTURE = "";
|
||||
|
||||
@Argument(fullName="mendelian_violation_quality", shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||
private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0;
|
||||
|
||||
|
||||
private ArrayList<String> selectNames = new ArrayList<String>();
|
||||
private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
|
||||
|
||||
|
|
@ -82,6 +84,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
private boolean DISCORDANCE_ONLY = false;
|
||||
|
||||
private MendelianViolation mv;
|
||||
private boolean MENDELIAN_VIOLATIONS = false;
|
||||
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
|
|
@ -93,13 +99,19 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
DISCORDANCE_ONLY = discordanceRodName.length() > 0;
|
||||
samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS);
|
||||
for (String sample : samples) {
|
||||
logger.info("Including sample '" + sample + "'");
|
||||
}
|
||||
|
||||
DISCORDANCE_ONLY = discordanceRodName.length() > 0;
|
||||
if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName);
|
||||
|
||||
if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||
mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
|
||||
MENDELIAN_VIOLATIONS = true;
|
||||
}
|
||||
|
||||
// Initialize VCF header
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
|
|
@ -145,25 +157,31 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( vcs == null || vcs.size() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
VariantContext sub = subsetRecord(vc, samples);
|
||||
|
||||
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||
//System.out.printf("%s%n",sub.toString());
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||
if ( !VariantContextUtils.match(sub, jexl) ) {
|
||||
return 0;
|
||||
}
|
||||
if (MENDELIAN_VIOLATIONS) {
|
||||
if (mv.isViolation(vc)) {
|
||||
vcfWriter.add(vc, ref.getBase());
|
||||
}
|
||||
}
|
||||
|
||||
vcfWriter.add(sub, ref.getBase());
|
||||
else {
|
||||
VariantContext sub = subsetRecord(vc, samples);
|
||||
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||
//System.out.printf("%s%n",sub.toString());
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||
if ( !VariantContextUtils.match(sub, jexl) ) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
vcfWriter.add(sub, ref.getBase());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.broad.tribble.util.variantcontext.MutableVariantContext;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
|
|
@ -45,6 +46,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import sun.management.counter.Variability;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -199,7 +201,13 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
else
|
||||
counter.numFP = 1L;
|
||||
}
|
||||
vcfWriter.add(vcComp, ref.getBase());
|
||||
if (!vcComp.hasAttribute("callStatus")) {
|
||||
MutableVariantContext mvc = new MutableVariantContext(vcComp);
|
||||
mvc.putAttribute("callStatus", call.confidentlyCalled ? "confident" : "notConfident" );
|
||||
vcfWriter.add(mvc, ref.getBase());
|
||||
}
|
||||
else
|
||||
vcfWriter.add(vcComp, ref.getBase());
|
||||
return counter;
|
||||
}
|
||||
|
||||
|
|
@ -224,7 +232,7 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
|||
logger.info("FP = " + reduceSum.numFP);
|
||||
logger.info("FN = " + reduceSum.numFN);
|
||||
logger.info("PPV = " + ((double) reduceSum.numTP /( reduceSum.numTP + reduceSum.numFP)));
|
||||
logger.info("FPV = " + ((double) reduceSum.numTN /( reduceSum.numTN + reduceSum.numFN)));
|
||||
logger.info("NPV = " + ((double) reduceSum.numTN /( reduceSum.numTN + reduceSum.numFN)));
|
||||
logger.info("Uncovered = " + reduceSum.numUncovered);
|
||||
logger.info("confidently called = " + reduceSum.numConfidentCalls);
|
||||
logger.info("not confidently called = " + reduceSum.numNotConfidentCalls );
|
||||
|
|
|
|||
Loading…
Reference in New Issue