Added --concordance option that outputs the intersection between two VCF files. Useful to see what calls were made in both technologies/algorithms.

Wiki has been updated accordingly.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5507 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-03-24 21:27:16 +00:00
parent e47513f043
commit 47279ee56e
1 changed files with 21 additions and 4 deletions

View File

@ -67,9 +67,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci.", required=false)
private boolean EXCLUDE_FILTERED = false;
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called from a ROD comparison track. Use -disc ROD_NAME", required=false)
@Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track. Use -disc ROD_NAME", required=false)
private String discordanceRodName = "";
@Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track. Use -conc ROD_NAME", required=false)
private String concordanceRodName = "";
@Argument(fullName="family_structure", shortName="family", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = "";
@ -86,9 +89,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private Set<String> samples = new HashSet<String>();
private boolean DISCORDANCE_ONLY = false;
private boolean CONCORDANCE_ONLY = false;
private MendelianViolation mv;
/* default name for the variant dataset (VCF) */
private final String variantRodName = "variant";
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
@ -96,7 +103,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
public void initialize() {
// Get list of samples to include in the output
ArrayList<String> rodNames = new ArrayList<String>();
rodNames.add("variant");
rodNames.add(variantRodName);
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
@ -109,6 +116,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
DISCORDANCE_ONLY = discordanceRodName.length() > 0;
if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName);
CONCORDANCE_ONLY = concordanceRodName.length() > 0;
if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName);
if (MENDELIAN_VIOLATIONS)
mv = new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD);
else if (!FAMILY_STRUCTURE.isEmpty()) {
@ -143,7 +153,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
Collection<VariantContext> vcs = tracker.getVariantContexts(ref, variantRodName, null, context.getLocation(), true, false);
// If a discordance rod name was provided, we select the variants present in the discordance that
// are not present in the variant.
@ -167,7 +177,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
return 0;
}
for (VariantContext vc : vcs) {
if (MENDELIAN_VIOLATIONS) {
if (mv.isViolation(vc)) {
@ -175,6 +184,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
}
}
else if (CONCORDANCE_ONLY) {
VariantContext compVC = tracker.getVariantContext(ref, concordanceRodName, context.getLocation());
if (compVC != null) {
vcfWriter.add(vc, ref.getBase());
break; // we only want one line per event in this case.
}
}
else {
VariantContext sub = subsetRecord(vc, samples);
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {