From f37564e63aaa157f6f5a8544ba051260a37490d0 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 13 Oct 2009 15:50:04 +0000 Subject: [PATCH] Our BWA is now looking at roughly the same number of candidate alignments as BWA/C. Performance is now at 11k reads / min, still a long way from BWA/C. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1817 348d0f76-0448-11de-a6fe-93d51630548a --- .../alignment/bwa/AlignerTestHarness.java | 6 +- .../sting/alignment/bwa/BWAAligner.java | 71 +++++++++++++------ 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java index c78afcbbf..8d6c6d086 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java @@ -47,8 +47,8 @@ public class AlignerTestHarness { for(SAMRecord read: reader) { count++; - //if( count > 144000 ) break; - if( count < 366000 ) continue; + //if( count > 100000 ) break; + //if( count < 366000 ) continue; //if( count != 2 ) continue; //if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") ) // continue; @@ -95,6 +95,8 @@ public class AlignerTestHarness { //System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions()); } else { + System.out.printf("Error aligning read %s%n", read.getReadName()); + mismatches++; IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 741636b98..b05d2a2a8 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -88,13 +88,6 @@ public class BWAAligner implements Aligner { List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); - /* - for( int i = 0; i < forwardLowerBounds.size(); i++ ) - System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i)); - for( int i = 0; i < reverseLowerBounds.size(); i++ ) - System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i)); - */ - // Seed the best score with any score that won't overflow on comparison. int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY; int bestDiff = MAXIMUM_EDIT_DISTANCE+1; @@ -118,8 +111,19 @@ public class BWAAligner implements Aligner { BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT; List lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds; + // if z < D(i) then return {} + int mismatches = maxDiff - alignment.mismatches - alignment.gapOpens - alignment.gapExtensions; + if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value ) + continue; + + if(mismatches == 0) { + exactMatch(alignment,bases,bwt); + if(alignment.loBound > alignment.hiBound) + continue; + } + // Found a valid alignment; store it and move on. - if(alignment.position == read.getReadLength()-1) { + if(alignment.position >= read.getReadLength()-1) { for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) { BWAAlignment finalAlignment = alignment.clone(); @@ -143,13 +147,31 @@ public class BWAAligner implements Aligner { } //System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position] : ""); + /* + System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(), + alignment.negativeStrand?1:0, + bases.length-alignment.position-1, + alignment.getCurrentState().toString().charAt(0), + alignment.getMismatches(), + alignment.getGapOpens(), + alignment.getGapExtensions(), + lowerBounds.get(alignment.position+1).value, + lowerBounds.get(alignment.position+1).width, + alignment.loBound, + alignment.hiBound); + */ - // if z < D(i) then return {} - int mismatches = maxDiff - alignment.mismatches - alignment.gapOpens - alignment.gapExtensions; - if( mismatches < lowerBounds.get(alignment.position+1).value ) - continue; + // Temporary -- look ahead to see if the next alignment is bounded. + boolean allowDifferences = mismatches > 0; + boolean allowMismatches = mismatches > 0; + if( alignment.position+1 < read.getReadLength()-1 ) { + allowDifferences &= lowerBounds.get(alignment.position+2).value <= mismatches - 1; + allowMismatches &= + !(lowerBounds.get(alignment.position+2).value == mismatches-1 && lowerBounds.get(alignment.position+1).value == mismatches-1 && + lowerBounds.get(alignment.position+2).width == lowerBounds.get(alignment.position+1).width); + } - if( mismatches > 0 && + if( allowDifferences && alignment.position+1 >= INDEL_END_SKIP-1+alignment.gapOpens+alignment.gapExtensions && read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.gapOpens+alignment.gapExtensions ) { if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) { @@ -186,13 +208,7 @@ public class BWAAligner implements Aligner { } // Mismatches - // Temporary -- look ahead to see if the next alignment is bounded. - boolean allowMismatches = mismatches > 0; - if( alignment.position+1 < read.getReadLength()-1 ) - allowMismatches &= - !(lowerBounds.get(alignment.position+2).value == mismatches-1 && lowerBounds.get(alignment.position+1).value == mismatches-1 && - lowerBounds.get(alignment.position+2).width == lowerBounds.get(alignment.position+1).width); - alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowMismatches)); + alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches)); } return successfulMatches; @@ -302,4 +318,19 @@ public class BWAAligner implements Aligner { return newAlignments; } + /** + * Exactly match the given alignment against the given BWT. + * @param alignment Alignment to match. + * @param bases Bases to use. + * @param bwt BWT to use. + */ + private void exactMatch( BWAAlignment alignment, byte[] bases, BWT bwt ) { + while( ++alignment.position < bases.length ) { + Base base = Base.fromASCII(bases[alignment.position]); + alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; + alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); + if( alignment.loBound > alignment.hiBound ) + return; + } + } }