From b24240664ff7e7adee4f3b883a24bad67f552648 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Sun, 22 Nov 2009 17:24:31 +0000 Subject: [PATCH] Reduced the number of calls to new ArrayList() in TableRecalibration. This results in a speed up of perhaps up to 6 percent (timed trials are hard). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2112 348d0f76-0448-11de-a6fe-93d51630548a --- .../Recalibration/CovariateCounterWalker.java | 7 ++-- .../TableRecalibrationWalker.java | 42 ++++++++++--------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java index fb979b05a..2ab232713 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java @@ -85,7 +85,6 @@ public class CovariateCounterWalker extends LocusWalker { @Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. For debugging purposes only.") private boolean USE_SLX_PLATFORM = false; - private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps private ArrayList requestedCovariates; // A list to hold the covariate objects that were requested private IdentityHashMap readDatumHashMap; // A hash map that hashes the read object itself into properties commonly pulled out of the read. Done for optimization purposes. @@ -95,6 +94,8 @@ public class CovariateCounterWalker extends LocusWalker { private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file + //private final String versionNumber = "2.0.0"; // major version, minor version, and build number + //--------------------------------------------------------------------------------------------------------------- // // initialize @@ -107,6 +108,7 @@ public class CovariateCounterWalker extends LocusWalker { */ public void initialize() { + //logger.info( "CovariateCounterWalker version: " + versionNumber ); // Get a list of all available covariates final List> classes = PackageUtils.getClassesImplementingInterface(Covariate.class); @@ -298,11 +300,10 @@ public class CovariateCounterWalker extends LocusWalker { sizeOfReadDatumHashMap++; } - if( readDatum.mappingQuality > 0 ) { // BUGBUG: turn this into a read filter after passing the old integration tests // skip first and last base because there is no dinuc - // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests. + // BUGBUG: Technically we only have to skip the first base on forward reads and the last base on negative strand reads. Change after passing old integration tests. if( offset > 0 ) { if( offset < readDatum.length - 1 ) { // skip if base quality is zero diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java index d49b0aefe..63d15af6f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java @@ -92,12 +92,14 @@ public class TableRecalibrationWalker extends ReadWalker requestedCovariates; + private ArrayList fullCovariateKey; // the list that will be used over and over again to hold the full set of covariate values + private ArrayList collapsedTableKey; // the key that will be used over and over again to query the collapsed tables private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*"); - + //private final String versionNumber = "2.0.0"; // major version, minor version, and build number //--------------------------------------------------------------------------------------------------------------- // @@ -112,6 +114,7 @@ public class TableRecalibrationWalker extends ReadWalker> classes = PackageUtils.getClassesImplementingInterface(Covariate.class); @@ -130,6 +133,9 @@ public class TableRecalibrationWalker extends ReadWalker(); + collapsedTableKey = new ArrayList(); + // Read in the covariates that were used from the input file requestedCovariates = new ArrayList(); @@ -194,7 +200,7 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); + // Get the covariate values which make up the key for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here + fullCovariateKey.add( covariate.getValue( readDatum, iii ) ); // offset is zero based so passing iii is correct here } - recalQuals[iii] = performSequentialQualityCalculation( key ); + recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey ); + fullCovariateKey.clear(); } preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low @@ -332,22 +339,21 @@ public class TableRecalibrationWalker extends ReadWalker newKey = new ArrayList(); // The global quality shift (over the read group only) - newKey.add( readGroupKeyElement ); - RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey ); - Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get( newKey ); + collapsedTableKey.add( readGroupKeyElement ); + RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get(collapsedTableKey); + Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get(collapsedTableKey); double globalDeltaQ = 0.0; double aggregrateQreported = 0.0; if( globalDeltaQDatum != null ) { - aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) ); + aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get(collapsedTableKey) / ((double) globalDeltaQDatum.getNumObservations()) ); globalDeltaQ = globalDeltaQEmpirical - aggregrateQreported; } // The shift in quality between reported and empirical - newKey.add( qualityScoreKeyElement ); - Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get( newKey ); + collapsedTableKey.add( qualityScoreKeyElement ); + Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get(collapsedTableKey); double deltaQReported = 0.0; if( deltaQReportedEmpirical != null ) { deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; @@ -359,8 +365,8 @@ public class TableRecalibrationWalker extends ReadWalker= QualityUtils.MAX_REASONABLE_Q_SCORE ) { - throw new StingException( "Illegal base quality score calculated: " + key + - String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) ); - } - + collapsedTableKey.clear(); return newQualityByte; }