Experimental change in how we classify indels - up to now, an indel of say AA was counted as a 2-mer repeat expansion. But in reality, if the event is sounded by A's it's really a multiple monomer expansion. So, we first reduce the indel bases in case they are made of repeated elements before classifying them.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4939 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2011-01-05 17:13:18 +00:00
parent 4ac0590744
commit d203f5e39a
1 changed files with 27 additions and 4 deletions

View File

@ -312,17 +312,39 @@ public class IndelStatistics extends VariantEvaluator {
/*
* increment the specified value
*/
private String findMinimalEvent(String eventString) {
// for each length up to given string length, see if event string is a repetition of units of size N
boolean foundSubstring = false;
String minEvent = eventString;
for (int k=1; k < eventString.length(); k++) {
if (eventString.length() % k > 0)
continue;
String str = eventString.substring(0,k);
// now see if event string is a repetition of str
int numReps = eventString.length() / k;
String r = "";
for (int j=0; j < numReps; j++)
r = r.concat(str);
if (r.matches(eventString)) {
foundSubstring = true;
minEvent = str;
break;
}
}
return minEvent;
}
public void incrValue(VariantContext vc, ReferenceContext ref) {
int eventLength = 0;
boolean isInsertion = false, isDeletion = false;
String indelAlleleString;
if ( vc.isInsertion() ) {
eventLength = vc.getAlternateAllele(0).length();
isInsertion = true;
indelAlleleString = vc.getAlternateAllele(0).getDisplayString();
} else if ( vc.isDeletion() ) {
eventLength = vc.getReference().length();
isDeletion = true;
indelAlleleString = vc.getReference().getDisplayString();
}
@ -334,8 +356,9 @@ public class IndelStatistics extends VariantEvaluator {
byte[] refBases = ref.getBases();
indelAlleleString = findMinimalEvent(indelAlleleString);
eventLength = indelAlleleString.length();
// See first if indel is a repetition of bases before current
int indStart = refBases.length/2-eventLength+1;