Our BWA is now looking at roughly the same number of candidate alignments as BWA/C. Performance is now at 11k reads / min, still a long way from BWA/C.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1817 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8d0e057d83
commit
f37564e63a
|
|
@ -47,8 +47,8 @@ public class AlignerTestHarness {
|
||||||
|
|
||||||
for(SAMRecord read: reader) {
|
for(SAMRecord read: reader) {
|
||||||
count++;
|
count++;
|
||||||
//if( count > 144000 ) break;
|
//if( count > 100000 ) break;
|
||||||
if( count < 366000 ) continue;
|
//if( count < 366000 ) continue;
|
||||||
//if( count != 2 ) continue;
|
//if( count != 2 ) continue;
|
||||||
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
|
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
|
||||||
// continue;
|
// continue;
|
||||||
|
|
@ -95,6 +95,8 @@ public class AlignerTestHarness {
|
||||||
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
|
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
System.out.printf("Error aligning read %s%n", read.getReadName());
|
||||||
|
|
||||||
mismatches++;
|
mismatches++;
|
||||||
|
|
||||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||||
|
|
|
||||||
|
|
@ -88,13 +88,6 @@ public class BWAAligner implements Aligner {
|
||||||
List<LowerBound> forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
|
List<LowerBound> forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
|
||||||
List<LowerBound> reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);
|
List<LowerBound> reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);
|
||||||
|
|
||||||
/*
|
|
||||||
for( int i = 0; i < forwardLowerBounds.size(); i++ )
|
|
||||||
System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i));
|
|
||||||
for( int i = 0; i < reverseLowerBounds.size(); i++ )
|
|
||||||
System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i));
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Seed the best score with any score that won't overflow on comparison.
|
// Seed the best score with any score that won't overflow on comparison.
|
||||||
int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY;
|
int bestScore = Integer.MAX_VALUE - MISMATCH_PENALTY;
|
||||||
int bestDiff = MAXIMUM_EDIT_DISTANCE+1;
|
int bestDiff = MAXIMUM_EDIT_DISTANCE+1;
|
||||||
|
|
@ -118,8 +111,19 @@ public class BWAAligner implements Aligner {
|
||||||
BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT;
|
BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT;
|
||||||
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
|
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
|
||||||
|
|
||||||
|
// if z < D(i) then return {}
|
||||||
|
int mismatches = maxDiff - alignment.mismatches - alignment.gapOpens - alignment.gapExtensions;
|
||||||
|
if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if(mismatches == 0) {
|
||||||
|
exactMatch(alignment,bases,bwt);
|
||||||
|
if(alignment.loBound > alignment.hiBound)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Found a valid alignment; store it and move on.
|
// Found a valid alignment; store it and move on.
|
||||||
if(alignment.position == read.getReadLength()-1) {
|
if(alignment.position >= read.getReadLength()-1) {
|
||||||
for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) {
|
for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) {
|
||||||
BWAAlignment finalAlignment = alignment.clone();
|
BWAAlignment finalAlignment = alignment.clone();
|
||||||
|
|
||||||
|
|
@ -143,13 +147,31 @@ public class BWAAligner implements Aligner {
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position] : "");
|
//System.out.printf("Processing alignments; queue size = %d, alignment = %s, bound = %d, base = %s%n", alignments.size(), alignment, lowerBounds.get(alignment.position+1).value, alignment.position >= 0 ? (char)bases[alignment.position] : "");
|
||||||
|
/*
|
||||||
|
System.out.printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%d,%d]\t[%d,%d]%n",alignments.size(),
|
||||||
|
alignment.negativeStrand?1:0,
|
||||||
|
bases.length-alignment.position-1,
|
||||||
|
alignment.getCurrentState().toString().charAt(0),
|
||||||
|
alignment.getMismatches(),
|
||||||
|
alignment.getGapOpens(),
|
||||||
|
alignment.getGapExtensions(),
|
||||||
|
lowerBounds.get(alignment.position+1).value,
|
||||||
|
lowerBounds.get(alignment.position+1).width,
|
||||||
|
alignment.loBound,
|
||||||
|
alignment.hiBound);
|
||||||
|
*/
|
||||||
|
|
||||||
// if z < D(i) then return {}
|
// Temporary -- look ahead to see if the next alignment is bounded.
|
||||||
int mismatches = maxDiff - alignment.mismatches - alignment.gapOpens - alignment.gapExtensions;
|
boolean allowDifferences = mismatches > 0;
|
||||||
if( mismatches < lowerBounds.get(alignment.position+1).value )
|
boolean allowMismatches = mismatches > 0;
|
||||||
continue;
|
if( alignment.position+1 < read.getReadLength()-1 ) {
|
||||||
|
allowDifferences &= lowerBounds.get(alignment.position+2).value <= mismatches - 1;
|
||||||
|
allowMismatches &=
|
||||||
|
!(lowerBounds.get(alignment.position+2).value == mismatches-1 && lowerBounds.get(alignment.position+1).value == mismatches-1 &&
|
||||||
|
lowerBounds.get(alignment.position+2).width == lowerBounds.get(alignment.position+1).width);
|
||||||
|
}
|
||||||
|
|
||||||
if( mismatches > 0 &&
|
if( allowDifferences &&
|
||||||
alignment.position+1 >= INDEL_END_SKIP-1+alignment.gapOpens+alignment.gapExtensions &&
|
alignment.position+1 >= INDEL_END_SKIP-1+alignment.gapOpens+alignment.gapExtensions &&
|
||||||
read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.gapOpens+alignment.gapExtensions ) {
|
read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.gapOpens+alignment.gapExtensions ) {
|
||||||
if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
|
if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
|
||||||
|
|
@ -186,13 +208,7 @@ public class BWAAligner implements Aligner {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mismatches
|
// Mismatches
|
||||||
// Temporary -- look ahead to see if the next alignment is bounded.
|
alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowDifferences&&allowMismatches));
|
||||||
boolean allowMismatches = mismatches > 0;
|
|
||||||
if( alignment.position+1 < read.getReadLength()-1 )
|
|
||||||
allowMismatches &=
|
|
||||||
!(lowerBounds.get(alignment.position+2).value == mismatches-1 && lowerBounds.get(alignment.position+1).value == mismatches-1 &&
|
|
||||||
lowerBounds.get(alignment.position+2).width == lowerBounds.get(alignment.position+1).width);
|
|
||||||
alignments.addAll(createMatchedAlignments(bwt,alignment,bases,allowMismatches));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return successfulMatches;
|
return successfulMatches;
|
||||||
|
|
@ -302,4 +318,19 @@ public class BWAAligner implements Aligner {
|
||||||
return newAlignments;
|
return newAlignments;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exactly match the given alignment against the given BWT.
|
||||||
|
* @param alignment Alignment to match.
|
||||||
|
* @param bases Bases to use.
|
||||||
|
* @param bwt BWT to use.
|
||||||
|
*/
|
||||||
|
private void exactMatch( BWAAlignment alignment, byte[] bases, BWT bwt ) {
|
||||||
|
while( ++alignment.position < bases.length ) {
|
||||||
|
Base base = Base.fromASCII(bases[alignment.position]);
|
||||||
|
alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1;
|
||||||
|
alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound);
|
||||||
|
if( alignment.loBound > alignment.hiBound )
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue