Optimization: from 22k reads/min - 30k reads/min.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1822 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-10-13 20:59:29 +00:00
parent 96b8499a31
commit 837ae1d33a
3 changed files with 87 additions and 44 deletions

View File

@ -112,7 +112,7 @@ public class BWAAligner implements Aligner {
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds; List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
// if z < D(i) then return {} // if z < D(i) then return {}
int mismatches = maxDiff - alignment.mismatches - alignment.gapOpens - alignment.gapExtensions; int mismatches = maxDiff - alignment.getMismatches() - alignment.getGapOpens() - alignment.getGapExtensions();
if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value ) if( alignment.position < lowerBounds.size()-1 && mismatches < lowerBounds.get(alignment.position+1).value )
continue; continue;
@ -139,7 +139,7 @@ public class BWAAligner implements Aligner {
successfulMatches.add(finalAlignment); successfulMatches.add(finalAlignment);
bestScore = Math.min(finalAlignment.getScore(),bestScore); bestScore = Math.min(finalAlignment.getScore(),bestScore);
bestDiff = Math.min(finalAlignment.mismatches+finalAlignment.gapOpens+finalAlignment.gapExtensions,bestDiff); bestDiff = Math.min(finalAlignment.getMismatches()+finalAlignment.getGapOpens()+finalAlignment.getGapExtensions(),bestDiff);
maxDiff = bestDiff + 1; maxDiff = bestDiff + 1;
} }
@ -172,36 +172,36 @@ public class BWAAligner implements Aligner {
} }
if( allowDifferences && if( allowDifferences &&
alignment.position+1 >= INDEL_END_SKIP-1+alignment.gapOpens+alignment.gapExtensions && alignment.position+1 >= INDEL_END_SKIP-1+alignment.getGapOpens()+alignment.getGapExtensions() &&
read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.gapOpens+alignment.gapExtensions ) { read.getReadLength()-1-(alignment.position+1) >= INDEL_END_SKIP+alignment.getGapOpens()+alignment.getGapExtensions() ) {
if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) { if( alignment.getCurrentState() == AlignmentState.MATCH_MISMATCH ) {
if( alignment.gapOpens < MAXIMUM_GAP_OPENS ) { if( alignment.getGapOpens() < MAXIMUM_GAP_OPENS ) {
// Add a potential insertion extension. // Add a potential insertion extension.
BWAAlignment insertionAlignment = createInsertionAlignment(alignment); BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
insertionAlignment.gapOpens++; insertionAlignment.incrementGapOpens();
alignments.add(insertionAlignment); alignments.add(insertionAlignment);
// Add a potential deletion by marking a deletion and augmenting the position. // Add a potential deletion by marking a deletion and augmenting the position.
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment); List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
for( BWAAlignment deletionAlignment: deletionAlignments ) for( BWAAlignment deletionAlignment: deletionAlignments )
deletionAlignment.gapOpens++; deletionAlignment.incrementGapOpens();
alignments.addAll(deletionAlignments); alignments.addAll(deletionAlignments);
} }
} }
else if( alignment.getCurrentState() == AlignmentState.INSERTION ) { else if( alignment.getCurrentState() == AlignmentState.INSERTION ) {
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
// Add a potential insertion extension. // Add a potential insertion extension.
BWAAlignment insertionAlignment = createInsertionAlignment(alignment); BWAAlignment insertionAlignment = createInsertionAlignment(alignment);
insertionAlignment.gapExtensions++; insertionAlignment.incrementGapExtensions();
alignments.add(insertionAlignment); alignments.add(insertionAlignment);
} }
} }
else if( alignment.getCurrentState() == AlignmentState.DELETION ) { else if( alignment.getCurrentState() == AlignmentState.DELETION ) {
if( alignment.gapExtensions < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) { if( alignment.getGapExtensions() < MAXIMUM_GAP_EXTENSIONS && mismatches > 0 ) {
// Add a potential deletion by marking a deletion and augmenting the position. // Add a potential deletion by marking a deletion and augmenting the position.
List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment); List<BWAAlignment> deletionAlignments = createDeletionAlignments(bwt,alignment);
for( BWAAlignment deletionAlignment: deletionAlignments ) for( BWAAlignment deletionAlignment: deletionAlignments )
deletionAlignment.gapExtensions++; deletionAlignment.incrementGapExtensions();
alignments.addAll(deletionAlignments); alignments.addAll(deletionAlignments);
} }
} }
@ -225,7 +225,6 @@ public class BWAAligner implements Aligner {
seed.position = -1; seed.position = -1;
seed.loBound = 0; seed.loBound = 0;
seed.hiBound = bwt.length(); seed.hiBound = bwt.length();
seed.mismatches = 0;
return seed; return seed;
} }
@ -271,7 +270,7 @@ public class BWAAligner implements Aligner {
newAlignment.position++; newAlignment.position++;
newAlignment.addState(AlignmentState.MATCH_MISMATCH); newAlignment.addState(AlignmentState.MATCH_MISMATCH);
if( Bases.fromASCII(bases[newAlignment.position]) == null || base != Bases.fromASCII(bases[newAlignment.position]) ) if( Bases.fromASCII(bases[newAlignment.position]) == null || base != Bases.fromASCII(bases[newAlignment.position]) )
newAlignment.mismatches++; newAlignment.incrementMismatches();
newAlignments.add(newAlignment); newAlignments.add(newAlignment);
} }

View File

@ -50,17 +50,17 @@ public class BWAAlignment implements Alignment, Cloneable {
/** /**
* Working variable. How many mismatches have been encountered at this point. * Working variable. How many mismatches have been encountered at this point.
*/ */
protected int mismatches; private int mismatches;
/** /**
* Number of gap opens in alignment. * Number of gap opens in alignment.
*/ */
protected int gapOpens; private int gapOpens;
/** /**
* Number of gap extensions in alignment. * Number of gap extensions in alignment.
*/ */
protected int gapExtensions; private int gapExtensions;
/** /**
* Working variable. The lower bound of the alignment within the BWT. * Working variable. The lower bound of the alignment within the BWT.
@ -72,6 +72,11 @@ public class BWAAlignment implements Alignment, Cloneable {
*/ */
protected int hiBound; protected int hiBound;
/**
* Cache the score.
*/
private int score;
/** /**
* Gets the starting position for the given alignment. * Gets the starting position for the given alignment.
* @return Starting position. * @return Starting position.
@ -113,13 +118,35 @@ public class BWAAlignment implements Alignment, Cloneable {
* @return BWA-style scores. 0 is best. * @return BWA-style scores. 0 is best.
*/ */
public int getScore() { public int getScore() {
return mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY; return score;
} }
public int getMismatches() { return mismatches; } public int getMismatches() { return mismatches; }
public int getGapOpens() { return gapOpens; } public int getGapOpens() { return gapOpens; }
public int getGapExtensions() { return gapExtensions; } public int getGapExtensions() { return gapExtensions; }
public void incrementMismatches() {
this.mismatches++;
updateScore();
}
public void incrementGapOpens() {
this.gapOpens++;
updateScore();
}
public void incrementGapExtensions() {
this.gapExtensions++;
updateScore();
}
/**
* Updates the score based on new information about matches / mismatches.
*/
private void updateScore() {
score = mismatches*aligner.MISMATCH_PENALTY + gapOpens*aligner.GAP_OPEN_PENALTY + gapExtensions*aligner.GAP_EXTENSION_PENALTY;
}
/** /**
* Create a new alignment with the given parent aligner. * Create a new alignment with the given parent aligner.
* @param aligner Aligner being used. * @param aligner Aligner being used.
@ -164,12 +191,15 @@ public class BWAAlignment implements Alignment, Cloneable {
public int compareTo(Alignment rhs) { public int compareTo(Alignment rhs) {
BWAAlignment other = (BWAAlignment)rhs; BWAAlignment other = (BWAAlignment)rhs;
// If the scores are equal, use the score to disambiguate. // If the scores are different, disambiguate using the score.
int scoreComparison = Integer.valueOf(getScore()).compareTo(other.getScore()); if(score != other.score)
if( scoreComparison != 0 ) return score > other.score ? 1 : -1;
return scoreComparison;
return -Long.valueOf(this.creationNumber).compareTo(other.creationNumber); // Otherwise, use the order in which the elements were created.
if(this.creationNumber != other.creationNumber)
return this.creationNumber > other.creationNumber ? -1 : 1;
return 0;
} }
public String toString() { public String toString() {

View File

@ -13,10 +13,15 @@ import java.util.Map;
*/ */
public class Counts implements Cloneable { public class Counts implements Cloneable {
/** /**
* Internal representation of counts, broken down by pack value. * Internal representation of counts, broken down by ASCII value.
*/ */
private Map<Byte,Integer> counts = new HashMap<Byte,Integer>(); private Map<Byte,Integer> counts = new HashMap<Byte,Integer>();
/**
* Internal representation of cumulative counts, broken down by ASCII value.
*/
private Map<Byte,Integer> cumulativeCounts = new HashMap<Byte,Integer>();
/** /**
* Create an empty Counts object with values A=0,C=0,G=0,T=0. * Create an empty Counts object with values A=0,C=0,G=0,T=0.
*/ */
@ -28,16 +33,22 @@ public class Counts implements Cloneable {
* @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example). * @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example).
*/ */
public Counts( int[] data, boolean cumulative ) { public Counts( int[] data, boolean cumulative ) {
for( byte base: Bases.instance)
counts.put(base,data[Bases.toPack(base)]);
// De-cumulatize data as necessary.
if(cumulative) { if(cumulative) {
int previousCount = 0; int priorCount = 0;
for( byte base: Bases.instance ) { for(byte base: Bases.instance) {
int count = counts.get(base); int count = data[Bases.toPack(base)];
counts.put(base,count-previousCount); counts.put(base,count-priorCount);
previousCount = count; cumulativeCounts.put(base,priorCount);
priorCount = count;
}
}
else {
int priorCount = 0;
for(byte base: Bases.instance) {
int count = data[Bases.toPack(base)];
counts.put(base,count);
cumulativeCounts.put(base,priorCount);
priorCount += count;
} }
} }
} }
@ -49,11 +60,18 @@ public class Counts implements Cloneable {
*/ */
public int[] toArray(boolean cumulative) { public int[] toArray(boolean cumulative) {
int[] countArray = new int[counts.size()]; int[] countArray = new int[counts.size()];
for(byte base: Bases.instance)
countArray[Bases.toPack(base)] = counts.get(base);
if(cumulative) { if(cumulative) {
for( int i = 1; i < countArray.length; i++ ) int index = 0;
countArray[i] += countArray[i-1]; for(byte base: Bases.instance) {
if(index++ == 0) continue;
countArray[index] = getCumulative(base);
}
countArray[countArray.length-1] = getTotal();
}
else {
int index = 0;
for(byte base: Bases.instance)
countArray[index] = get(base);
} }
return countArray; return countArray;
} }
@ -101,12 +119,7 @@ public class Counts implements Cloneable {
* @return Number of bases of this type seen. * @return Number of bases of this type seen.
*/ */
public int getCumulative(byte base) { public int getCumulative(byte base) {
int accum = 0; return cumulativeCounts.get(base);
for( byte current: Bases.allOf() ) {
if(base == current) break;
accum += counts.get(current);
}
return accum;
} }
/** /**
@ -115,8 +128,9 @@ public class Counts implements Cloneable {
*/ */
public int getTotal() { public int getTotal() {
int accumulator = 0; int accumulator = 0;
for( int count : counts.values() ) for(byte base: Bases.instance) {
accumulator += count; accumulator += get(base);
}
return accumulator; return accumulator;
} }
} }