Make BaseRecalibration thread-safe
-- In the process uncovered two strange things
1 -- qualityScoreByFullCovariateKey was created but never used. Seems like a cache?
2 -- Discovered nasty bug in BaseRecalibrator: https://jira.broadinstitute.org/browse/GSA-534
This commit is contained in:
parent
27ddebee53
commit
c9ea213c9b
|
|
@ -27,12 +27,11 @@ package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMTag;
|
import net.sf.samtools.SAMTag;
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -46,7 +45,6 @@ import java.io.File;
|
||||||
|
|
||||||
public class BaseRecalibration {
|
public class BaseRecalibration {
|
||||||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
||||||
private final ReadCovariates readCovariates;
|
|
||||||
|
|
||||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||||
private final RecalibrationTables recalibrationTables;
|
private final RecalibrationTables recalibrationTables;
|
||||||
|
|
@ -56,10 +54,23 @@ public class BaseRecalibration {
|
||||||
private final int preserveQLessThan;
|
private final int preserveQLessThan;
|
||||||
private final boolean emitOriginalQuals;
|
private final boolean emitOriginalQuals;
|
||||||
|
|
||||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
// TODO -- was this supposed to be used somewhere?
|
||||||
static {
|
// private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||||
for (int i = 0; i < EventType.values().length; i++)
|
// static {
|
||||||
qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
// for (int i = 0; i < EventType.values().length; i++)
|
||||||
|
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||||
|
// }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thread local cache to allow multi-threaded use of this class
|
||||||
|
*/
|
||||||
|
private ThreadLocal<ReadCovariates> readCovariatesCache;
|
||||||
|
{
|
||||||
|
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
|
||||||
|
@Override protected ReadCovariates initialValue() {
|
||||||
|
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -81,7 +92,6 @@ public class BaseRecalibration {
|
||||||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||||
|
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
|
||||||
this.disableIndelQuals = disableIndelQuals;
|
this.disableIndelQuals = disableIndelQuals;
|
||||||
this.preserveQLessThan = preserveQLessThan;
|
this.preserveQLessThan = preserveQLessThan;
|
||||||
this.emitOriginalQuals = emitOriginalQuals;
|
this.emitOriginalQuals = emitOriginalQuals;
|
||||||
|
|
@ -104,6 +114,11 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute all covariates for the read
|
// Compute all covariates for the read
|
||||||
|
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||||
|
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||||
|
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
|
||||||
|
// TODO -- needs to be fixed.
|
||||||
|
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||||
|
|
||||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||||
|
|
@ -130,6 +145,7 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements a serial recalibration of the reads using the combinational table.
|
* Implements a serial recalibration of the reads using the combinational table.
|
||||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||||
|
|
@ -147,7 +163,7 @@ public class BaseRecalibration {
|
||||||
* @param errorModel the event type
|
* @param errorModel the event type
|
||||||
* @return A recalibrated quality score as a byte
|
* @return A recalibrated quality score as a byte
|
||||||
*/
|
*/
|
||||||
protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||||
|
|
||||||
final byte qualFromRead = (byte)(long)key[1];
|
final byte qualFromRead = (byte)(long)key[1];
|
||||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The object temporarily held by a read that describes all of it's covariates.
|
* The object temporarily held by a read that describes all of it's covariates.
|
||||||
*
|
*
|
||||||
|
|
@ -21,6 +23,17 @@ public class ReadCovariates {
|
||||||
currentCovariateIndex = index;
|
currentCovariateIndex = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Necessary due to bug in BaseRecalibration recalibrateRead function. It is clearly seeing space it's not supposed to
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public ReadCovariates clear() {
|
||||||
|
for ( int i = 0; i < keys.length; i++ )
|
||||||
|
for ( int j = 0; j < keys[i].length; j++)
|
||||||
|
Arrays.fill(keys[i][j], 0);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue