First part of fix for correctly processing mixed multi-allelic records: correctly compute start/stop of vc when there are no null alleles (i.e. record is not a simple indel).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5958 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d27800e07c
commit
1d6486a28f
|
|
@ -446,17 +446,35 @@ public class UnifiedGenotyperEngine {
|
|||
private int calculateEndPos(Set<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// TODO - temp fix until we can deal with extended events properly
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean isSNP = true;
|
||||
boolean isSNP = true, hasNullAltAllele = false;
|
||||
for (Allele a : alleles){
|
||||
if (a.getBaseString().length() != 1) {
|
||||
if (a.length() != 1) {
|
||||
isSNP = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (Allele a : alleles){
|
||||
if (a.isNull()) {
|
||||
hasNullAltAllele = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
|
||||
// standard insertion: ref allele length = 0, endLos = startLoc
|
||||
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
|
||||
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
|
||||
//
|
||||
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
|
||||
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
|
||||
|
||||
|
||||
int endLoc = loc.getStart();
|
||||
if ( !isSNP )
|
||||
if ( !isSNP) {
|
||||
endLoc += refAllele.length();
|
||||
if(!hasNullAltAllele)
|
||||
endLoc--;
|
||||
|
||||
}
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue