Refine the way we merge records in CombineVariants of different types. As of before, two records of different types were not combined and were kept separate. This is still the case, except when the alleles of one record are a strict subset of alleles of another record. For example, a SNP with alleles {A*,T} and a mixed record with alleles {A*,T, AAT} are now combined when start position matches.
This commit is contained in:
parent
5b1bf6e244
commit
a942fa38ef
|
|
@ -234,16 +234,47 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||
return 0;
|
||||
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
List<VariantContext> preMergedVCs = new ArrayList<VariantContext>();
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
// iterate over the types so that it's deterministic
|
||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||
if ( VCsByType.containsKey(type) )
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
// se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record,
|
||||
// we will still merge those records.
|
||||
if (preMergedVCs.size() > 1) {
|
||||
for (VariantContext vc1 : preMergedVCs) {
|
||||
VariantContext newvc = vc1;
|
||||
boolean merged = false;
|
||||
for (int k=0; k < mergedVCs.size(); k++) {
|
||||
VariantContext vc2 = mergedVCs.get(k);
|
||||
|
||||
if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) {
|
||||
// all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2
|
||||
List<VariantContext> vcpair = new ArrayList<VariantContext>();
|
||||
vcpair.add(vc1);
|
||||
vcpair.add(vc2);
|
||||
newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair,
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
||||
mergedVCs.set(k,newvc);
|
||||
merged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!merged)
|
||||
mergedVCs.add(vc1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
mergedVCs = preMergedVCs;
|
||||
}
|
||||
|
||||
for ( VariantContext mergedVC : mergedVCs ) {
|
||||
// only operate at the start of events
|
||||
if ( mergedVC == null )
|
||||
|
|
|
|||
|
|
@ -663,6 +663,18 @@ public class VariantContextUtils {
|
|||
return merged;
|
||||
}
|
||||
|
||||
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
|
||||
// if all alleles of vc1 are a contained in alleles of vc2, return true
|
||||
if (!vc1.getReference().equals(vc2.getReference()))
|
||||
return false;
|
||||
|
||||
for (Allele a :vc1.getAlternateAlleles()) {
|
||||
if (!vc2.getAlternateAlleles().contains(a))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
boolean trimVC;
|
||||
|
|
|
|||
Loading…
Reference in New Issue