Added functionality:
-disc (--discordance) parameter together with a ROD track will output a VCF with the variants in the ROD track that are not present in the 'variants' VCF. Useful tool to list the variants from hapmap (for example) that weren't called in a dataset. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5392 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bf2e02f472
commit
73e43d8d2c
|
|
@ -42,6 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
import org.omg.PortableInterceptor.DISCARDING;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
|
@ -71,11 +72,16 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci.", required=false)
|
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci.", required=false)
|
||||||
private boolean EXCLUDE_FILTERED = false;
|
private boolean EXCLUDE_FILTERED = false;
|
||||||
|
|
||||||
|
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
|
||||||
|
private String discordanceRodName = "";
|
||||||
|
|
||||||
private ArrayList<String> selectNames = new ArrayList<String>();
|
private ArrayList<String> selectNames = new ArrayList<String>();
|
||||||
private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
|
private List<VariantContextUtils.JexlVCMatchExp> jexls = null;
|
||||||
|
|
||||||
private Set<String> samples = new HashSet<String>();
|
private Set<String> samples = new HashSet<String>();
|
||||||
|
|
||||||
|
private boolean DISCORDANCE_ONLY = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
*/
|
*/
|
||||||
|
|
@ -87,11 +93,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||||
|
|
||||||
|
DISCORDANCE_ONLY = discordanceRodName.length() > 0;
|
||||||
samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS);
|
samples = SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS);
|
||||||
|
|
||||||
for (String sample : samples) {
|
for (String sample : samples) {
|
||||||
logger.info("Including sample '" + sample + "'");
|
logger.info("Including sample '" + sample + "'");
|
||||||
}
|
}
|
||||||
|
if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName);
|
||||||
|
|
||||||
// Initialize VCF header
|
// Initialize VCF header
|
||||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||||
|
|
@ -121,13 +128,33 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
|
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
|
||||||
|
|
||||||
|
// If a discordance rod name was provided, we select the variants present in the discordance that
|
||||||
|
// are not present in the variant.
|
||||||
|
if (DISCORDANCE_ONLY) {
|
||||||
|
if (vcs == null || vcs.size() == 0) {
|
||||||
|
VariantContext compVC = tracker.getVariantContext(ref, discordanceRodName, context.getLocation());
|
||||||
|
if (compVC != null) {
|
||||||
|
// if the genotype of any of the samples we're looking it is HET or HomVar, we write this variant
|
||||||
|
for (String sample : samples) {
|
||||||
|
Genotype g = compVC.getGenotype(sample);
|
||||||
|
if ( g.isHet() || g.isHomVar() ) {
|
||||||
|
vcfWriter.add(compVC, ref.getBase());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if ( vcs == null || vcs.size() == 0) {
|
if ( vcs == null || vcs.size() == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
VariantContext sub = subsetRecord(vc, samples);
|
VariantContext sub = subsetRecord(vc, samples);
|
||||||
|
|
||||||
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||||
//System.out.printf("%s%n",sub.toString());
|
//System.out.printf("%s%n",sub.toString());
|
||||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue