Reduced the number of calls to new ArrayList() in TableRecalibration. This results in a speed up of perhaps up to 6 percent (timed trials are hard).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2112 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c9c4999354
commit
b24240664f
|
|
@ -85,7 +85,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.")
|
||||
private boolean USE_SLX_PLATFORM = false;
|
||||
|
||||
|
||||
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||
private ArrayList<Covariate> requestedCovariates; // A list to hold the covariate objects that were requested
|
||||
private IdentityHashMap<SAMRecord, ReadHashDatum> readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes.
|
||||
|
|
@ -95,6 +94,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
||||
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
||||
|
||||
//private final String versionNumber = "2.0.0"; // major version, minor version, and build number
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// initialize
|
||||
|
|
@ -107,6 +108,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
//logger.info( "CovariateCounterWalker version: " + versionNumber );
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||
|
||||
|
|
@ -298,11 +300,10 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
sizeOfReadDatumHashMap++;
|
||||
}
|
||||
|
||||
|
||||
if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests
|
||||
|
||||
// skip first and last base because there is no dinuc
|
||||
// BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
|
||||
// BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests.
|
||||
if( offset > 0 ) {
|
||||
if( offset < readDatum.length - 1 ) {
|
||||
// skip if base quality is zero
|
||||
|
|
|
|||
|
|
@ -92,12 +92,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
private RecalDataManager dataManager;
|
||||
private ArrayList<Covariate> requestedCovariates;
|
||||
private ArrayList<Comparable> fullCovariateKey; // the list that will be used over and over again to hold the full set of covariate values
|
||||
private ArrayList<Comparable> collapsedTableKey; // the key that will be used over and over again to query the collapsed tables
|
||||
|
||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||
|
||||
|
||||
//private final String versionNumber = "2.0.0"; // major version, minor version, and build number
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -112,6 +114,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
//logger.info( "TableRecalibrationWalker version: " + versionNumber );
|
||||
// Get a list of all available covariates
|
||||
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||
|
||||
|
|
@ -130,6 +133,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
|
||||
}
|
||||
|
||||
fullCovariateKey = new ArrayList<Comparable>();
|
||||
collapsedTableKey = new ArrayList<Comparable>();
|
||||
|
||||
// Read in the covariates that were used from the input file
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
|
||||
|
|
@ -194,7 +200,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
} catch ( FileNotFoundException e ) {
|
||||
Utils.scareUser("Can not find input file: " + RECAL_FILE);
|
||||
} catch ( NumberFormatException e ) {
|
||||
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table generated by an older version of CovariateCounterWalker.");
|
||||
throw new StingException("Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
|
||||
}
|
||||
logger.info( "...done!" );
|
||||
|
||||
|
|
@ -287,13 +293,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
// For each base in the read
|
||||
for( int iii = startPos; iii < stopPos; iii++ ) { // skip first or last base because there is no dinuc depending on the direction of the read
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
|
||||
// Get the covariate values which make up the key
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
|
||||
fullCovariateKey.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here
|
||||
}
|
||||
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey );
|
||||
fullCovariateKey.clear();
|
||||
}
|
||||
|
||||
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
||||
|
|
@ -332,22 +339,21 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
String readGroupKeyElement = key.get(0).toString();
|
||||
int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
|
||||
byte qualFromRead = (byte)qualityScoreKeyElement;
|
||||
ArrayList<Comparable> newKey = new ArrayList<Comparable>();
|
||||
|
||||
// The global quality shift (over the read group only)
|
||||
newKey.add( readGroupKeyElement );
|
||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
||||
Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get( newKey );
|
||||
collapsedTableKey.add( readGroupKeyElement );
|
||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get(collapsedTableKey);
|
||||
Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get(collapsedTableKey);
|
||||
double globalDeltaQ = 0.0;
|
||||
double aggregrateQreported = 0.0;
|
||||
if( globalDeltaQDatum != null ) {
|
||||
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get(collapsedTableKey) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||
globalDeltaQ = globalDeltaQEmpirical - aggregrateQreported;
|
||||
}
|
||||
|
||||
// The shift in quality between reported and empirical
|
||||
newKey.add( qualityScoreKeyElement );
|
||||
Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get( newKey );
|
||||
collapsedTableKey.add( qualityScoreKeyElement );
|
||||
Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get(collapsedTableKey);
|
||||
double deltaQReported = 0.0;
|
||||
if( deltaQReportedEmpirical != null ) {
|
||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
|
|
@ -359,8 +365,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
double deltaQPos = 0.0;
|
||||
double deltaQDinuc = 0.0;
|
||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||
newKey.add( key.get(iii) ); // the given covariate
|
||||
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get( newKey );
|
||||
collapsedTableKey.add( key.get(iii) ); // the given covariate
|
||||
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get(collapsedTableKey);
|
||||
if( deltaQCovariateEmpirical != null ) {
|
||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
|
|
@ -368,7 +374,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
if(iii==3) { deltaQDinuc = deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); }
|
||||
}
|
||||
}
|
||||
newKey.remove( 2 ); // this new covariate is always added in at position 2 in the newKey list
|
||||
collapsedTableKey.remove( 2 ); // this new covariate is always added in at position 2 in the collapsedTableKey list
|
||||
}
|
||||
|
||||
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
|
|
@ -385,11 +391,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
|
||||
//}
|
||||
|
||||
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||
throw new StingException( "Illegal base quality score calculated: " + key +
|
||||
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
||||
}
|
||||
|
||||
collapsedTableKey.clear();
|
||||
return newQualityByte;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue