Refine the way we merge records in CombineVariants of different types. As of before, two records of different types were not combined and were kept separate. This is still the case, except when the alleles of one record are a strict subset of alleles of another record. For example, a SNP with alleles {A*,T} and a mixed record with alleles {A*,T, AAT} are now combined when start position matches.

This commit is contained in:
Guillermo del Angel 2011-09-15 10:22:28 -04:00
parent 5b1bf6e244
commit a942fa38ef
2 changed files with 45 additions and 2 deletions

View File

@ -234,16 +234,47 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
return 0;
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
List<VariantContext> preMergedVCs = new ArrayList<VariantContext>();
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// iterate over the types so that it's deterministic
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) )
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
// se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record,
// we will still merge those records.
if (preMergedVCs.size() > 1) {
for (VariantContext vc1 : preMergedVCs) {
VariantContext newvc = vc1;
boolean merged = false;
for (int k=0; k < mergedVCs.size(); k++) {
VariantContext vc2 = mergedVCs.get(k);
if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) {
// all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2
List<VariantContext> vcpair = new ArrayList<VariantContext>();
vcpair.add(vc1);
vcpair.add(vc2);
newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair,
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
mergedVCs.set(k,newvc);
merged = true;
break;
}
}
if (!merged)
mergedVCs.add(vc1);
}
}
else {
mergedVCs = preMergedVCs;
}
for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )

View File

@ -663,6 +663,18 @@ public class VariantContextUtils {
return merged;
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))
return false;
for (Allele a :vc1.getAlternateAlleles()) {
if (!vc2.getAlternateAlleles().contains(a))
return false;
}
return true;
}
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
// see if we need to trim common reference base from all alleles
boolean trimVC;