diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 7062f17e5..3e3b29a7f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -234,16 +234,47 @@ public class CombineVariants extends RodWalker { if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - List mergedVCs = new ArrayList(); + List preMergedVCs = new ArrayList(); Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); // iterate over the types so that it's deterministic for ( VariantContext.Type type : VariantContext.Type.values() ) { if ( VCsByType.containsKey(type) ) - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } + List mergedVCs = new ArrayList(); + // se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record, + // we will still merge those records. + if (preMergedVCs.size() > 1) { + for (VariantContext vc1 : preMergedVCs) { + VariantContext newvc = vc1; + boolean merged = false; + for (int k=0; k < mergedVCs.size(); k++) { + VariantContext vc2 = mergedVCs.get(k); + + if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) { + // all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2 + List vcpair = new ArrayList(); + vcpair.add(vc1); + vcpair.add(vc2); + newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair, + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); + mergedVCs.set(k,newvc); + merged = true; + break; + } + } + if (!merged) + mergedVCs.add(vc1); + } + } + else { + mergedVCs = preMergedVCs; + } + for ( VariantContext mergedVC : mergedVCs ) { // only operate at the start of events if ( mergedVC == null ) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 986d6305c..506bb3b33 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -663,6 +663,18 @@ public class VariantContextUtils { return merged; } + public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { + // if all alleles of vc1 are a contained in alleles of vc2, return true + if (!vc1.getReference().equals(vc2.getReference())) + return false; + + for (Allele a :vc1.getAlternateAlleles()) { + if (!vc2.getAlternateAlleles().contains(a)) + return false; + } + + return true; + } public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC;