From f739305f43a4b8b7f0e47ad30b1ccea3ba2fcf9e Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Fri, 20 Apr 2012 14:04:31 -0400 Subject: [PATCH] Combine the variants found at a location --- .../walkers/variantutils/SelectVariants.java | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 7a1381b62..7590a8597 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -661,7 +661,68 @@ public class SelectVariants extends RodWalker implements TreeR vcs.add(builder.make()); } - return vcs; + return combineVariants(vcs); + } + + // Copied from CombineVariants + private Collection combineVariants(ArrayList vcs) { + List mergedVCs = new ArrayList(); + + //defaults from CombineVariants + VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE; + List priority = null; + VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; + VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE; + boolean printComplexMerges = false; + String SET_KEY = "set"; + boolean filteredAreUncalled = false; + boolean MERGE_INFO_WITH_MAX_AC = false; + + if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + + // TODO -- clean this up in a refactoring + // merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type) + if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) { + final List refs = VCsByType.remove(VariantContext.Type.NO_VARIATION); + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) { + VCsByType.get(type).addAll(refs); + break; + } + } + } + + // iterate over the types so that it's deterministic + for (VariantContext.Type type : VariantContext.Type.values()) { + if (VCsByType.containsKey(type)) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + } + } + else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) { + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs, + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + } + else { + logger.warn("Ignoring all records at site"); + } + + List recomputedVCs = new ArrayList(); + for ( VariantContext mergedVC : mergedVCs ) { + // only operate at the start of events + if ( mergedVC == null ) + continue; + + final VariantContextBuilder builder = new VariantContextBuilder(mergedVC); + // re-compute chromosome counts + VariantContextUtils.calculateChromosomeCounts(builder, false); + recomputedVCs.add(builder.make()); + } + + return recomputedVCs; } /**