Renamed variantMergeOptions to filteredRecordsMergeType, as this is really what it does. Cleaned up the wiki so that it's clear what this does, as well as included an example of how to create an intersection with CombineVariants and SelectVariants. Added integrationtests of CombineVariants with OMNI and HapMap that deal with the two ways to merge fitlered/unfiltered records at the same site.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5860 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
653475ce12
commit
cf3dbfee97
|
|
@ -317,8 +317,8 @@ public class VariantContextUtils {
|
|||
UNIQUIFY, PRIORITIZE, UNSORTED, REQUIRE_UNIQUE
|
||||
}
|
||||
|
||||
public enum VariantMergeType {
|
||||
UNION, INTERSECT, MASTER
|
||||
public enum FilteredRecordMergeType {
|
||||
KEEP_IF_ANY_UNFILTERED, KEEP_IF_ALL_UNFILTERED
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -392,7 +392,7 @@ public class VariantContextUtils {
|
|||
|
||||
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase);
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -403,19 +403,19 @@ public class VariantContextUtils {
|
|||
*
|
||||
* @param unsortedVCs
|
||||
* @param priorityListOfVCs
|
||||
* @param variantMergeOptions
|
||||
* @param filteredRecordMergeType
|
||||
* @param genotypeMergeOptions
|
||||
* @return
|
||||
*/
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
||||
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false);
|
||||
}
|
||||
|
||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey,
|
||||
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
|
|
@ -536,7 +536,7 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
// if at least one record was unfiltered and we want a union, clear all of the filters
|
||||
if ( variantMergeOptions == VariantMergeType.UNION && nFiltered != VCs.size() )
|
||||
if ( filteredRecordMergeType == filteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
|
||||
filters.clear();
|
||||
|
||||
// we care about where the call came from
|
||||
|
|
|
|||
|
|
@ -62,8 +62,8 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(shortName="genotypeMergeOptions", doc="How should we merge genotype records for samples shared across the ROD files?", required=false)
|
||||
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
|
||||
|
||||
@Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false)
|
||||
public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION;
|
||||
@Argument(shortName="filteredRecordsMergeType", doc="How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered", required=false)
|
||||
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=false)
|
||||
public String PRIORITY_STRING = null;
|
||||
|
|
@ -86,6 +86,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if variant is present in at least N input files.", required=false)
|
||||
public int minimumN = 1;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="masterMerge", shortName="master", doc="Master merge mode -- experts only. You need to look at the code to understand it", required=false)
|
||||
public boolean master = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false)
|
||||
public boolean MERGE_INFO_WITH_MAX_AC = false;
|
||||
|
|
@ -157,10 +161,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
|
||||
VariantContext mergedVC = null;
|
||||
if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) {
|
||||
if ( master ) {
|
||||
mergedVC = VariantContextUtils.masterMerge(vcs, "master");
|
||||
} else {
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, variantMergeOption,
|
||||
mergedVC = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(),vcs, priority, filteredRecordsMergeType,
|
||||
genotypeMergeOption, true, printComplexMerges, ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -59,6 +59,18 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
}
|
||||
|
||||
public void combineSites(String args, String md5) {
|
||||
String file1 = "1000G_omni2.5.b37.sites.vcf";
|
||||
String file2 = "hapmap_3.3.b37.sites.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference
|
||||
+ " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1
|
||||
+ " -B:hm3,VCF " + validationDataLocation + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2117fff6e0d182cd20be508e9661829c", true); }
|
||||
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "2cfaf7af3dd119df08b8a9c1f72e2f93", " -setKey foo", true); }
|
||||
|
|
@ -76,6 +88,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "b3783384b7c8e877b971033e90beba48", true); }
|
||||
|
||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "902e541c87caa72134db6293fc46f0ad"); }
|
||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "f339ad4bb5863b58b9c919ce7d040bb9"); }
|
||||
|
||||
@Test public void threeWayWithRefs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" +
|
||||
|
|
@ -83,7 +98,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" +
|
||||
" -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" +
|
||||
" -setKey centerSet" +
|
||||
" -variantMergeOptions UNION" +
|
||||
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
|
|
|
|||
Loading…
Reference in New Issue