From b45b1d5f2b34212339ee4fa1e040afea348b032b Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 9 Jun 2009 04:33:15 +0000 Subject: [PATCH] border case bug fixes git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@951 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/indels/IntervalCleanerWalker.java | 11 +++++------ .../sting/playground/indels/AlignmentUtils.java | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java index 631324297..cd8098296 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java @@ -539,7 +539,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker switch ( ce.getOperator() ) { case M: for (int k = 0 ; k < ce.getLength() ; k++, refIdx++, altIdx++ ) { - if ( Character.toUpperCase(readStr.charAt(altIdx)) != Character.toUpperCase(reference.charAt(refIdx)) ) + if ( refIdx < reference.length() && Character.toUpperCase(readStr.charAt(altIdx)) != Character.toUpperCase(reference.charAt(refIdx)) ) cleanedMismatchBases[refIdx] += (int)qualStr.charAt(altIdx) - 33; } break; @@ -626,7 +626,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker // position, we will move insertion left, to the position right after CA. This way, while moving the indel across the repeat // on the ref, we can theoretically move it across a non-repeat on the read if the latter has a mismtach. - while ( period < indel_length ) { // we will always get at least trivial period = indelStringLength period = BaseUtils.sequencePeriod(indelString, period+1); @@ -634,16 +633,16 @@ public class IntervalCleanerWalker extends LocusWindowWalker if ( indel_length % period != 0 ) continue; // if indel sequence length is not a multiple of the period, it's not gonna work int newIndex = indelIndexOnRef; - + while ( newIndex >= period ) { // let's see if there is a repeat, i.e. if we could also say that same bases at lower position are deleted - + // lets check if bases [newIndex-period,newIndex) immediately preceding the indel on the ref // are the same as the currently checked period of the inserted sequence: boolean match = true; for ( int testRefPos = newIndex - period, indelPos = 0 ; testRefPos < newIndex; testRefPos++, indelPos++) { - if ( Character.toUpperCase(refSeq.charAt(testRefPos)) != indelString.charAt(indelPos) ) { + if ( Character.toUpperCase(refSeq.charAt(testRefPos)) != indelString.charAt(indelPos) || indelString.charAt(indelPos) == 'N' ) { match = false; break; } @@ -654,7 +653,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker } final int newDifference = indelIndexOnRef - newIndex; - if ( newDifference > difference ) difference = newDifference; // deletion should be moved 'difference' bases left + if ( newDifference > difference ) difference = newDifference; // deletion should be moved 'difference' bases left if ( period == 1 ) break; // we do not have to check all periods of homonucleotide sequences, we already // got maximum possible shift after checking period=1 above. diff --git a/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java b/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java index bd6887e3f..af565b0c6 100644 --- a/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java @@ -123,7 +123,7 @@ public class AlignmentUtils { switch ( ce.getOperator() ) { case M: for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) { - if ( Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIndex)) ) + if ( refIndex < refSeq.length() && Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIndex)) ) mismatches++; } break;