Performance optimizations and caliper benchmarking code for consolidateCigar

-- Now that this function is used in the core of LIBS it needed some basic optimizations, which are now complete, pass all unit tests.
-- Added caliper benchmark for AlignmentUtils to assess performance (showing new version is 3x-10x faster)
-- Remove unused import in ReadStateManager
This commit is contained in:
Mark DePristo 2013-04-19 17:31:59 -04:00
parent df6ba74395
commit df90597bfc
2 changed files with 26 additions and 4 deletions

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.locusiterator;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;

View File

@ -581,8 +581,11 @@ public final class AlignmentUtils {
*/
@Ensures({"result != null"})
public static Cigar consolidateCigar( final Cigar c ) {
if( c == null ) { throw new IllegalArgumentException("Cigar cannot be null"); }
if( c.isEmpty() ) { return c; }
if ( c == null ) { throw new IllegalArgumentException("Cigar cannot be null"); }
// fast check to determine if there's anything worth doing before we create new Cigar and actually do some work
if ( ! needsConsolidation(c) )
return c;
final Cigar returnCigar = new Cigar();
int sumLength = 0;
@ -601,13 +604,33 @@ public final class AlignmentUtils {
lastElement = cur;
}
if( sumLength > 0 ) {
if ( sumLength > 0 ) {
returnCigar.add(new CigarElement(sumLength, lastElement.getOperator()));
}
return returnCigar;
}
/**
* Does the cigar C need to be consolidated?
*
* @param c a non-null cigar
* @return true if so
*/
private static boolean needsConsolidation(final Cigar c) {
if ( c.numCigarElements() <= 1 )
return false; // fast path for empty or single cigar
CigarOperator lastOp = null;
for( final CigarElement cur : c.getCigarElements() ) {
if ( cur.getLength() == 0 || lastOp == cur.getOperator() )
return true;
lastOp = cur.getOperator();
}
return false;
}
/**
* Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.