From 73e43d8d2c055057a29cf016c80b609057fa5449 Mon Sep 17 00:00:00 2001 From: carneiro Date: Mon, 7 Mar 2011 19:18:15 +0000 Subject: [PATCH] Added functionality: -disc (--discordance) parameter together with a ROD track will output a VCF with the variants in the ROD track that are not present in the 'variants' VCF. Useful tool to list the variants from hapmap (for example) that weren't called in a dataset. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5392 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/variantutils/SelectVariants.java | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 90cb66b36..17b9a57ee 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -42,6 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.vcf.VCFUtils; +import org.omg.PortableInterceptor.DISCARDING; import java.util.*; import java.util.regex.Matcher; @@ -71,11 +72,16 @@ public class SelectVariants extends RodWalker { @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci.", required=false) private boolean EXCLUDE_FILTERED = false; + @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false) + private String discordanceRodName = ""; + private ArrayList selectNames = new ArrayList(); private List jexls = null; private Set samples = new HashSet(); + private boolean DISCORDANCE_ONLY = false; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ @@ -87,11 +93,12 @@ public class SelectVariants extends RodWalker { Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + DISCORDANCE_ONLY = discordanceRodName.length() > 0; samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS); - for (String sample : samples) { logger.info("Including sample '" + sample + "'"); } + if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName); // Initialize VCF header Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); @@ -121,13 +128,33 @@ public class SelectVariants extends RodWalker { return 0; Collection vcs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + + // If a discordance rod name was provided, we select the variants present in the discordance that + // are not present in the variant. + if (DISCORDANCE_ONLY) { + if (vcs == null || vcs.size() == 0) { + VariantContext compVC = tracker.getVariantContext(ref, discordanceRodName, context.getLocation()); + if (compVC != null) { + // if the genotype of any of the samples we're looking it is HET or HomVar, we write this variant + for (String sample : samples) { + Genotype g = compVC.getGenotype(sample); + if ( g.isHet() || g.isHomVar() ) { + vcfWriter.add(compVC, ref.getBase()); + return 1; + } + } + } + } + return 0; + } + if ( vcs == null || vcs.size() == 0) { return 0; } for (VariantContext vc : vcs) { VariantContext sub = subsetRecord(vc, samples); - + if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { //System.out.printf("%s%n",sub.toString()); for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {