From cf3dbfee979d1f0bbb3fa0140615fd2bc4b73abe Mon Sep 17 00:00:00 2001 From: depristo Date: Tue, 24 May 2011 01:54:29 +0000 Subject: [PATCH] Renamed variantMergeOptions to filteredRecordsMergeType, as this is really what it does. Cleaned up the wiki so that it's clear what this does, as well as included an example of how to create an intersection with CombineVariants and SelectVariants. Added integrationtests of CombineVariants with OMNI and HapMap that deal with the two ways to merge fitlered/unfiltered records at the same site. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5860 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 16 ++++++++-------- .../walkers/variantutils/CombineVariants.java | 12 ++++++++---- .../CombineVariantsIntegrationTest.java | 17 ++++++++++++++++- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index fe6495b64..576373949 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -317,8 +317,8 @@ public class VariantContextUtils { UNIQUIFY, PRIORITIZE, UNSORTED, REQUIRE_UNIQUE } - public enum VariantMergeType { - UNION, INTERSECT, MASTER + public enum FilteredRecordMergeType { + KEEP_IF_ANY_UNFILTERED, KEEP_IF_ALL_UNFILTERED } /** @@ -392,7 +392,7 @@ public class VariantContextUtils { public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { - return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); + return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase); } @@ -403,19 +403,19 @@ public class VariantContextUtils { * * @param unsortedVCs * @param priorityListOfVCs - * @param variantMergeOptions + * @param filteredRecordMergeType * @param genotypeMergeOptions * @return */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, - VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, + FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); + return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); } public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, - VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, + FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) @@ -536,7 +536,7 @@ public class VariantContextUtils { } // if at least one record was unfiltered and we want a union, clear all of the filters - if ( variantMergeOptions == VariantMergeType.UNION && nFiltered != VCs.size() ) + if ( filteredRecordMergeType == filteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) filters.clear(); // we care about where the call came from diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 9c89aa142..e905f647e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -62,8 +62,8 @@ public class CombineVariants extends RodWalker { @Argument(shortName="genotypeMergeOptions", doc="How should we merge genotype records for samples shared across the ROD files?", required=false) public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE; - @Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false) - public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION; + @Argument(shortName="filteredRecordsMergeType", doc="How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered", required=false) + public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; @Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=false) public String PRIORITY_STRING = null; @@ -86,6 +86,10 @@ public class CombineVariants extends RodWalker { @Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if variant is present in at least N input files.", required=false) public int minimumN = 1; + @Hidden + @Argument(fullName="masterMerge", shortName="master", doc="Master merge mode -- experts only. You need to look at the code to understand it", required=false) + public boolean master = false; + @Hidden @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) public boolean MERGE_INFO_WITH_MAX_AC = false; @@ -157,10 +161,10 @@ public class CombineVariants extends RodWalker { return 0; VariantContext mergedVC = null; - if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) { + if ( master ) { mergedVC = VariantContextUtils.masterMerge(vcs, "master"); } else { - mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption, + mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 8371acce9..33c595f8c 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -59,6 +59,18 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); } + public void combineSites(String args, String md5) { + String file1 = "1000G_omni2.5.b37.sites.vcf"; + String file2 = "hapmap_3.3.b37.sites.vcf"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference + + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 + + " -B:hm3,VCF " + validationDataLocation + file2 + args, + 1, + Arrays.asList(md5)); + executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); + } + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); } @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); } @@ -76,6 +88,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); } + @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + @@ -83,7 +98,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + - " -variantMergeOptions UNION" + + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1,