Make BaseRecalibration thread-safe
-- In the process uncovered two strange things
1 -- qualityScoreByFullCovariateKey was created but never used. Seems like a cache?
2 -- Discovered nasty bug in BaseRecalibrator: https://jira.broadinstitute.org/browse/GSA-534
This commit is contained in:
parent
27ddebee53
commit
c9ea213c9b
|
|
@ -27,12 +27,11 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
import net.sf.samtools.SAMTag;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -46,7 +45,6 @@ import java.io.File;
|
|||
|
||||
public class BaseRecalibration {
|
||||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
||||
private final ReadCovariates readCovariates;
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables;
|
||||
|
|
@ -56,10 +54,23 @@ public class BaseRecalibration {
|
|||
private final int preserveQLessThan;
|
||||
private final boolean emitOriginalQuals;
|
||||
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
for (int i = 0; i < EventType.values().length; i++)
|
||||
qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
// TODO -- was this supposed to be used somewhere?
|
||||
// private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
// static {
|
||||
// for (int i = 0; i < EventType.values().length; i++)
|
||||
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
// }
|
||||
|
||||
/**
|
||||
* Thread local cache to allow multi-threaded use of this class
|
||||
*/
|
||||
private ThreadLocal<ReadCovariates> readCovariatesCache;
|
||||
{
|
||||
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
|
||||
@Override protected ReadCovariates initialValue() {
|
||||
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,7 +92,6 @@ public class BaseRecalibration {
|
|||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
this.disableIndelQuals = disableIndelQuals;
|
||||
this.preserveQLessThan = preserveQLessThan;
|
||||
this.emitOriginalQuals = emitOriginalQuals;
|
||||
|
|
@ -104,6 +114,11 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
// Compute all covariates for the read
|
||||
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
|
||||
// TODO -- needs to be fixed.
|
||||
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
|
|
@ -130,6 +145,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||
|
|
@ -147,7 +163,7 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
|
|
@ -21,6 +23,17 @@ public class ReadCovariates {
|
|||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Necessary due to bug in BaseRecalibration recalibrateRead function. It is clearly seeing space it's not supposed to
|
||||
* @return
|
||||
*/
|
||||
public ReadCovariates clear() {
|
||||
for ( int i = 0; i < keys.length; i++ )
|
||||
for ( int j = 0; j < keys[i].length; j++)
|
||||
Arrays.fill(keys[i][j], 0);
|
||||
return this;
|
||||
}
|
||||
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||
|
|
|
|||
Loading…
Reference in New Issue