Optimization of reference window calculation to us bytes not char and no uppercasing since reference and read bases are always uppercase now. Should remove some ~5% of runtime of UG.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3438 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-05-26 14:10:26 +00:00
parent 88a06ad81f
commit 5928047d8b
2 changed files with 11 additions and 4 deletions

View File

@ -301,7 +301,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
refBase = ref.getBase();
// Skip if this base is an 'N' or etc.
if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) {
if( BaseUtils.isRegularBase( bases[offset] ) ) {
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
@ -310,7 +310,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
updateDataFromRead( gatkRead, offset, refBase );
} else { // calculate SOLID reference insertion rate
if( refBase == (char)bases[offset] ) {
if( refBase == bases[offset] ) {
solidInsertedReferenceBases++;
} else {
otherColorSpaceInconsistency++;

View File

@ -64,6 +64,7 @@ public class AlignmentUtils {
return getMismatchCount(r, refSeq, refIndex).numMismatches;
}
@Deprecated
public static int numMismatches(SAMRecord r, String refSeq, int refIndex ) {
if ( r.getReadUnmappedFlag() ) return 1000000;
return numMismatches(r, StringUtil.stringToBytes(refSeq), refIndex);
@ -73,11 +74,14 @@ public class AlignmentUtils {
return getMismatchCount(r, refSeq, refIndex).mismatchQualities;
}
@Deprecated
public static long mismatchingQualities(SAMRecord r, String refSeq, int refIndex ) {
if ( r.getReadUnmappedFlag() ) return 1000000;
return numMismatches(r, StringUtil.stringToBytes(refSeq), refIndex);
}
// todo -- this code and mismatchesInRefWindow should be combined and optimized into a single
// todo -- high performance implementation. We can do a lot better than this right now
private static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) {
MismatchCount mc = new MismatchCount();
@ -188,9 +192,12 @@ public class AlignmentUtils {
if ( ignoreTargetSite && ref.getLocus().getStart() == currentPos )
continue;
char readChr = (char)readBases[readIndex];
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
byte readChr = readBases[readIndex];
if ( readChr != refChr )
sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1;
// char readChr = (char)readBases[readIndex];
// if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
// sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1;
}
break;
case I: