First part of fix for correctly processing mixed multi-allelic records: correctly compute start/stop of vc when there are no null alleles (i.e. record is not a simple indel).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5958 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2011-06-08 13:36:18 +00:00
parent d27800e07c
commit 1d6486a28f
1 changed files with 21 additions and 3 deletions

View File

@ -446,17 +446,35 @@ public class UnifiedGenotyperEngine {
private int calculateEndPos(Set<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
boolean isSNP = true;
boolean isSNP = true, hasNullAltAllele = false;
for (Allele a : alleles){
if (a.getBaseString().length() != 1) {
if (a.length() != 1) {
isSNP = false;
break;
}
}
for (Allele a : alleles){
if (a.isNull()) {
hasNullAltAllele = true;
break;
}
}
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
// standard insertion: ref allele length = 0, endLos = startLoc
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
//
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
int endLoc = loc.getStart();
if ( !isSNP )
if ( !isSNP) {
endLoc += refAllele.length();
if(!hasNullAltAllele)
endLoc--;
}
return endLoc;
}