diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 17b9a57ee..b4725b81b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.MendelianViolation; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeader; @@ -40,15 +41,9 @@ import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.vcf.VCFUtils; -import org.omg.PortableInterceptor.DISCARDING; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.io.File; -import java.io.FileNotFoundException; /** * Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria, @@ -75,6 +70,13 @@ public class SelectVariants extends RodWalker { @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false) private String discordanceRodName = ""; + @Argument(fullName="family", shortName="fam", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + private String FAMILY_STRUCTURE = ""; + + @Argument(fullName="mendelian_violation_quality", shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) + private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0; + + private ArrayList selectNames = new ArrayList(); private List jexls = null; @@ -82,6 +84,10 @@ public class SelectVariants extends RodWalker { private boolean DISCORDANCE_ONLY = false; + private MendelianViolation mv; + private boolean MENDELIAN_VIOLATIONS = false; + + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ @@ -93,13 +99,19 @@ public class SelectVariants extends RodWalker { Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); - DISCORDANCE_ONLY = discordanceRodName.length() > 0; samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS); for (String sample : samples) { logger.info("Including sample '" + sample + "'"); } + + DISCORDANCE_ONLY = discordanceRodName.length() > 0; if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName); + if (!FAMILY_STRUCTURE.isEmpty()) { + mv = new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD); + MENDELIAN_VIOLATIONS = true; + } + // Initialize VCF header Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); @@ -145,25 +157,31 @@ public class SelectVariants extends RodWalker { } } } - return 0; } if ( vcs == null || vcs.size() == 0) { return 0; } + for (VariantContext vc : vcs) { - VariantContext sub = subsetRecord(vc, samples); - - if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { - //System.out.printf("%s%n",sub.toString()); - for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { - if ( !VariantContextUtils.match(sub, jexl) ) { - return 0; - } + if (MENDELIAN_VIOLATIONS) { + if (mv.isViolation(vc)) { + vcfWriter.add(vc, ref.getBase()); } + } - vcfWriter.add(sub, ref.getBase()); + else { + VariantContext sub = subsetRecord(vc, samples); + if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { + //System.out.printf("%s%n",sub.toString()); + for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { + if ( !VariantContextUtils.match(sub, jexl) ) { + return 0; + } + } + vcfWriter.add(sub, ref.getBase()); + } } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java index 16afd8912..f54c602c6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeAndValidateWalker.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers; import com.google.common.collect.ImmutableSet; +import org.broad.tribble.util.variantcontext.MutableVariantContext; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeaderLine; @@ -45,6 +46,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.vcf.VCFUtils; +import sun.management.counter.Variability; import java.util.*; @@ -199,7 +201,13 @@ public class GenotypeAndValidateWalker extends RodWalker