Reduced the number of calls to new ArrayList() in TableRecalibration. This results in a speed up of perhaps up to 6 percent (timed trials are hard).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2112 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-22 17:24:31 +00:00
parent c9c4999354
commit b24240664f
2 changed files with 26 additions and 23 deletions

View File

@ -85,7 +85,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
private boolean USE_SLX_PLATFORM = false;
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
private IdentityHashMap<SAMRecord, ReadHashDatum> readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes.
@ -95,6 +94,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
//private final String versionNumber = "2.0.0"; // major version, minor version, and build number
//---------------------------------------------------------------------------------------------------------------
//
// initialize
@ -107,6 +108,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/
public void initialize() {
//logger.info( "CovariateCounterWalker version: " + versionNumber );
// Get a list of all available covariates
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
@ -298,11 +300,10 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
sizeOfReadDatumHashMap++;
}
if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
// skip first and last base because there is no dinuc
// BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
// BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
if( offset > 0 ) {
if( offset < readDatum.length - 1 ) {
// skip if base quality is zero

View File

@ -92,12 +92,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private RecalDataManager dataManager;
private ArrayList<Covariate> requestedCovariates;
private ArrayList<Comparable> fullCovariateKey; // the list that will be used over and over again to hold the full set of covariate values
private ArrayList<Comparable> collapsedTableKey; // the key that will be used over and over again to query the collapsed tables
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
//private final String versionNumber = "2.0.0"; // major version, minor version, and build number
//---------------------------------------------------------------------------------------------------------------
//
@ -112,6 +114,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
*/
public void initialize() {
//logger.info( "TableRecalibrationWalker version: " + versionNumber );
// Get a list of all available covariates
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
@ -130,6 +133,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
}
fullCovariateKey = new ArrayList<Comparable>();
collapsedTableKey = new ArrayList<Comparable>();
// Read in the covariates that were used from the input file
requestedCovariates = new ArrayList<Covariate>();
@ -194,7 +200,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
} catch ( FileNotFoundException e ) {
Utils.scareUser("Can not find input file: " + RECAL_FILE);
} catch ( NumberFormatException e ) {
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table generated by an older version of CovariateCounterWalker.");
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
}
logger.info( "...done!" );
@ -287,13 +293,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// For each base in the read
for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
List<Comparable> key = new ArrayList<Comparable>();
// Get the covariate values which make up the key
for( Covariate covariate : requestedCovariates ) {
key.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
fullCovariateKey.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
}
recalQuals[iii] = performSequentialQualityCalculation( key );
recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey );
fullCovariateKey.clear();
}
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
@ -332,22 +339,21 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
String readGroupKeyElement = key.get(0).toString();
int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
byte qualFromRead = (byte)qualityScoreKeyElement;
ArrayList<Comparable> newKey = new ArrayList<Comparable>();
// The global quality shift (over the read group only)
newKey.add( readGroupKeyElement );
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get( newKey );
collapsedTableKey.add( readGroupKeyElement );
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get(collapsedTableKey);
Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get(collapsedTableKey);
double globalDeltaQ = 0.0;
double aggregrateQreported = 0.0;
if( globalDeltaQDatum != null ) {
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get(collapsedTableKey) / ((double) globalDeltaQDatum.getNumObservations()) );
globalDeltaQ = globalDeltaQEmpirical - aggregrateQreported;
}
// The shift in quality between reported and empirical
newKey.add( qualityScoreKeyElement );
Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get( newKey );
collapsedTableKey.add( qualityScoreKeyElement );
Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get(collapsedTableKey);
double deltaQReported = 0.0;
if( deltaQReportedEmpirical != null ) {
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
@ -359,8 +365,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
double deltaQPos = 0.0;
double deltaQDinuc = 0.0;
for( int iii = 2; iii < key.size(); iii++ ) {
newKey.add( key.get(iii) ); // the given covariate
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get( newKey );
collapsedTableKey.add( key.get(iii) ); // the given covariate
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get(collapsedTableKey);
if( deltaQCovariateEmpirical != null ) {
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
if( VALIDATE_OLD_RECALIBRATOR ) {
@ -368,7 +374,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
if(iii==3) { deltaQDinuc = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); }
}
}
newKey.remove( 2 ); // this new covariate is always added in at position 2 in the newKey list
collapsedTableKey.remove( 2 ); // this new covariate is always added in at position 2 in the collapsedTableKey list
}
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
@ -385,11 +391,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
//}
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
throw new StingException( "Illegal base quality score calculated: " + key +
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
}
collapsedTableKey.clear();
return newQualityByte;
}