From a76fac46878bc87935d5564cf47fd155d6c0771f Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 13 Oct 2009 21:51:18 +0000 Subject: [PATCH] Cleanup existing speedups. Minor performance improvements. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1823 348d0f76-0448-11de-a6fe-93d51630548a --- .../alignment/bwa/AlignerTestHarness.java | 2 +- .../sting/alignment/bwa/BWAAligner.java | 28 +++++++++++++------ .../sting/alignment/bwa/BWAAlignment.java | 4 +-- .../sting/alignment/bwa/LowerBound.java | 4 +-- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java index 97e888296..90272db41 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java @@ -47,7 +47,7 @@ public class AlignerTestHarness { for(SAMRecord read: reader) { count++; - if( count > 100000 ) break; + if( count > 200000 ) break; //if( count < 366000 ) continue; //if( count != 2 ) continue; //if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") ) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 3bb27eec2..36319fda2 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -82,8 +82,8 @@ public class BWAAligner implements Aligner { public List align( SAMRecord read ) { List successfulMatches = new ArrayList(); - byte[] uncomplementedBases = read.getReadBases(); - byte[] complementedBases = BaseUtils.reverse(BaseUtils.simpleReverseComplement(uncomplementedBases)); + Byte[] uncomplementedBases = normalizeBases(read.getReadBases()); + Byte[] complementedBases = normalizeBases(BaseUtils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases()))); List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); @@ -107,7 +107,7 @@ public class BWAAligner implements Aligner { if( alignment.getScore() > bestScore + MISMATCH_PENALTY ) break; - byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases; + Byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases; BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT; List lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds; @@ -236,11 +236,11 @@ public class BWAAligner implements Aligner { * @param allowMismatch Should mismatching bases be allowed? * @return New alignment representing this position if valid; null otherwise. */ - private List createMatchedAlignments( BWT bwt, BWAAlignment alignment, byte[] bases, boolean allowMismatch ) { + private List createMatchedAlignments( BWT bwt, BWAAlignment alignment, Byte[] bases, boolean allowMismatch ) { List newAlignments = new ArrayList(); List baseChoices = new ArrayList(); - Byte thisBase = Bases.fromASCII(bases[alignment.position+1]); + Byte thisBase = bases[alignment.position+1]; if( allowMismatch ) baseChoices.addAll(Bases.allOf()); @@ -269,7 +269,7 @@ public class BWAAligner implements Aligner { newAlignment.position++; newAlignment.addState(AlignmentState.MATCH_MISMATCH); - if( Bases.fromASCII(bases[newAlignment.position]) == null || base != Bases.fromASCII(bases[newAlignment.position]) ) + if( bases[newAlignment.position] == null || base != bases[newAlignment.position] ) newAlignment.incrementMismatches(); newAlignments.add(newAlignment); @@ -323,13 +323,25 @@ public class BWAAligner implements Aligner { * @param bases Bases to use. * @param bwt BWT to use. */ - private void exactMatch( BWAAlignment alignment, byte[] bases, BWT bwt ) { + private void exactMatch( BWAAlignment alignment, Byte[] bases, BWT bwt ) { while( ++alignment.position < bases.length ) { - byte base = Bases.fromASCII(bases[alignment.position]); + byte base = bases[alignment.position]; alignment.loBound = bwt.counts(base) + bwt.occurrences(base,alignment.loBound-1) + 1; alignment.hiBound = bwt.counts(base) + bwt.occurrences(base,alignment.hiBound); if( alignment.loBound > alignment.hiBound ) return; } } + + /** + * Make each base into A/C/G/T or null if unknown. + * @param bases Base string to normalize. + * @return Array of normalized bases. + */ + private Byte[] normalizeBases( byte[] bases ) { + Byte[] normalBases = new Byte[bases.length]; + for(int i = 0; i < bases.length; i++) + normalBases[i] = Bases.fromASCII(bases[i]); + return normalBases; + } } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java index 0b49660f3..fc68773c6 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java @@ -196,8 +196,8 @@ public class BWAAlignment implements Alignment, Cloneable { return score > other.score ? 1 : -1; // Otherwise, use the order in which the elements were created. - if(this.creationNumber != other.creationNumber) - return this.creationNumber > other.creationNumber ? -1 : 1; + if(creationNumber != other.creationNumber) + return creationNumber > other.creationNumber ? -1 : 1; return 0; } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java b/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java index be0ddc1e8..31ca7f5bc 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java @@ -48,12 +48,12 @@ public class LowerBound { /** * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper. */ - public static List create( byte[] bases, BWT bwt ) { + public static List create( Byte[] bases, BWT bwt ) { List bounds = new ArrayList(); int loIndex = 0, hiIndex = bwt.length(), mismatches = 0; for( int i = bases.length-1; i >= 0; i-- ) { - Byte base = Bases.fromASCII(bases[i]); + Byte base = bases[i]; // Ignore non-ACGT bases. if( base != null ) {