Initial version of int indexed mapping for BQSR. Will be cleaned up in a bit.
This commit is contained in:
parent
c94c8a9c09
commit
cac72bce91
|
|
@ -664,12 +664,8 @@ public class SAMDataSource {
|
|||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities) {
|
||||
if (useOriginalBaseQualities || defaultBaseQualities >= 0)
|
||||
// only wrap if we are replacing the original qualities or using a default base quality
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
// as there is no reason to sort something that we will end of throwing away
|
||||
// **** NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! ****
|
||||
// (otherwise we will process something that we may end up throwing away)
|
||||
if (downsamplingFraction != null)
|
||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||
|
||||
|
|
@ -678,14 +674,18 @@ public class SAMDataSource {
|
|||
if (!noValidationOfReadOrder && enableVerification)
|
||||
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
|
||||
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||
|
||||
if (useOriginalBaseQualities || defaultBaseQualities >= 0)
|
||||
// only wrap if we are replacing the original qualities or using a default base quality
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||
|
||||
if (bqsrApplier != null)
|
||||
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
|
||||
|
||||
if (cmode != BAQ.CalculationMode.OFF)
|
||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
||||
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||
|
||||
return wrappedIterator;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private static int keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
|
||||
int key = end - start;
|
||||
int bitOffset = 4;
|
||||
int bitOffset = LENGTH_BITS;
|
||||
for (int i = start; i < end; i++) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
|
|
@ -227,7 +227,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||
int mask = 48; // use the mask to pull out bases
|
||||
int offset = 4;
|
||||
int offset = LENGTH_BITS;
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
|
|
@ -239,4 +239,17 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
return dna.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
// the maximum value is T (11 in binary) for each base in the context
|
||||
int length = Math.max(mismatchesContextSize, indelsContextSize); // the length of the context
|
||||
int key = length;
|
||||
int bitOffset = LENGTH_BITS;
|
||||
for (int i = 0; i <length ; i++) {
|
||||
key |= (3 << bitOffset);
|
||||
bitOffset += 2;
|
||||
}
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
*/
|
||||
|
||||
public interface Covariate {
|
||||
|
||||
/**
|
||||
* Initialize any member variables using the command-line arguments passed to the walker
|
||||
*
|
||||
|
|
@ -79,6 +80,13 @@ public interface Covariate {
|
|||
* @return a long representation of the object
|
||||
*/
|
||||
public int keyFromValue(final Object value);
|
||||
|
||||
/**
|
||||
* Returns the maximum value possible for any key representing this covariate
|
||||
*
|
||||
* @return the maximum value possible for any key representing this covariate
|
||||
*/
|
||||
public int maximumKeyValue();
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
|
|
|||
|
|
@ -46,8 +46,11 @@ import java.util.EnumSet;
|
|||
*/
|
||||
|
||||
public class CycleCovariate implements StandardCovariate {
|
||||
private final static EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
||||
private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
||||
|
||||
private static final int MAXIMUM_CYCLE_VALUE = 1000;
|
||||
|
||||
private static final EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
||||
private static final EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
|
|
@ -202,6 +205,11 @@ public class CycleCovariate implements StandardCovariate {
|
|||
return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return (MAXIMUM_CYCLE_VALUE << 1) + 1;
|
||||
}
|
||||
|
||||
private static int keyFromCycle(final int cycle) {
|
||||
// no negative values because values must fit into the first few bits of the long
|
||||
int result = Math.abs(cycle);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
|
|
@ -67,4 +68,9 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return QualityUtils.MAX_QUAL_SCORE;
|
||||
}
|
||||
}
|
||||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
|
|
@ -36,10 +36,10 @@ public class QuantizationInfo {
|
|||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
||||
|
||||
for (final Object value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = (RecalDatum)value;
|
||||
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = value;
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,11 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
return readGroupLookupTable.get(readGroupId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return readGroupLookupTable.size() - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the sample has a PU tag annotation, return that. If not, return the read group id.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
|
|
@ -211,19 +212,19 @@ public class RecalDataManager {
|
|||
|
||||
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||
int tableIndex = 0;
|
||||
int reportTableIndex = 0;
|
||||
|
||||
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
||||
for (final Covariate covariate : requestedCovariates)
|
||||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||
|
||||
for (final RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
|
||||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
|
|
@ -231,41 +232,40 @@ public class RecalDataManager {
|
|||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
final GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size());
|
||||
final GATKReportTable reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||
for (final Pair<String, String> columnName : columnNames)
|
||||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type
|
||||
|
||||
int rowIndex = 0;
|
||||
|
||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
||||
for (final NestedHashMap.Leaf row : table.getAllLeaves()) {
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> table = recalibrationTables.getTable(tableIndex);
|
||||
for (final IntegerIndexedNestedHashMap.Leaf row : table.getAllLeaves()) {
|
||||
final RecalDatum datum = (RecalDatum)row.value;
|
||||
final List<Object> keys = row.keys;
|
||||
final int[] keys = row.keys;
|
||||
|
||||
int columnIndex = 0;
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey((Integer)keys.get(columnIndex++)));
|
||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey((Integer) keys.get(columnIndex++)));
|
||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
||||
final int covariateIndex = (Integer)keys.get(columnIndex);
|
||||
final Covariate covariate = requestedCovariates[2 + covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(columnIndex+1);
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey(keys[columnIndex++]));
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey(keys[columnIndex++]));
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
final Covariate covariate = requestedCovariates[tableIndex];
|
||||
final int covariateKey = keys[columnIndex+1];
|
||||
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(covariateKey));
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariateNameMap.get(covariate));
|
||||
}
|
||||
}
|
||||
|
||||
final EventType event = EventType.eventFrom((Integer)keys.get(columnIndex));
|
||||
final EventType event = EventType.eventFrom(keys[columnIndex]);
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), event);
|
||||
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
|
||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
|
||||
|
|
@ -344,25 +344,31 @@ public class RecalDataManager {
|
|||
}
|
||||
|
||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||
|
||||
final NestedHashMap deltaTable = new NestedHashMap();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
for (final NestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
||||
newCovs.add(leaf.keys.get(0));
|
||||
newCovs.add(requestedCovariates.length); // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||
newCovs.add(leaf.keys.get(1));
|
||||
newCovs.add(leaf.keys.get(2));
|
||||
addToDeltaTable(deltaTable, newCovs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
for (final IntegerIndexedNestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
final int[] newCovs = new int[4];
|
||||
newCovs[0] = leaf.keys[0];
|
||||
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||
newCovs[2] = leaf.keys[1];
|
||||
newCovs[3] = leaf.keys[2];
|
||||
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
final NestedHashMap covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE);
|
||||
for (final NestedHashMap.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final List<Object> covs = new ArrayList<Object>(leaf.keys);
|
||||
covs.remove(1); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
addToDeltaTable(deltaTable, covs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < requestedCovariates.length; i++) {
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> covTable = recalibrationTables.getTable(i);
|
||||
for (final IntegerIndexedNestedHashMap.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final int[] covs = new int[leaf.keys.length-1];
|
||||
covs[0] = leaf.keys[0];
|
||||
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||
covs[2] = leaf.keys[2];
|
||||
covs[3] = leaf.keys[3];
|
||||
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
|
||||
}
|
||||
}
|
||||
|
||||
// output the csv file
|
||||
|
|
@ -397,12 +403,9 @@ public class RecalDataManager {
|
|||
|
||||
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
final List<Object> values = new ArrayList<Object>(4);
|
||||
values.add(covariates[0].formatKey((Integer)keys.get(0)));
|
||||
|
||||
// TODO -- create static final variables to hold the indexes of the RG, qual, cov ID, etc.
|
||||
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.index].formatKey((Integer)keys.get(0)));
|
||||
final int covariateIndex = (Integer)keys.get(1);
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[1] : covariates[2 + covariateIndex];
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index] : covariates[covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(2);
|
||||
values.add(covariate.formatKey(covariateKey));
|
||||
values.add(covariateNameMap.get(covariate));
|
||||
|
|
@ -422,14 +425,22 @@ public class RecalDataManager {
|
|||
* @param deltaKey the key to the table
|
||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||
*/
|
||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final Object[] deltaKey, final RecalDatum recalDatum) {
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
||||
Object[] wrappedKey = wrapKeys(deltaKey);
|
||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
||||
if (deltaDatum == null)
|
||||
deltaTable.put(new RecalDatum(recalDatum), deltaKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||
else
|
||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||
}
|
||||
|
||||
private static Object[] wrapKeys(final int[] keys) {
|
||||
final Object[] wrappedKeys = new Object[keys.length];
|
||||
for (int i = 0; i < keys.length; i++)
|
||||
wrappedKeys[i] = keys[i];
|
||||
return wrappedKeys;
|
||||
}
|
||||
|
||||
/**
|
||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||
*
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
|
||||
|
|
@ -26,9 +26,9 @@ public class RecalibrationReport {
|
|||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
private final Object[] tempRGarray = new Object[2];
|
||||
private final Object[] tempQUALarray = new Object[3];
|
||||
private final Object[] tempCOVarray = new Object[5];
|
||||
private final int[] tempRGarray = new int[2];
|
||||
private final int[] tempQUALarray = new int[3];
|
||||
private final int[] tempCOVarray = new int[4];
|
||||
|
||||
public RecalibrationReport(final File RECAL_FILE) {
|
||||
final GATKReport report = new GATKReport(RECAL_FILE);
|
||||
|
|
@ -57,16 +57,15 @@ public class RecalibrationReport {
|
|||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
final GATKReportTable rgReportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap rgTable = parseReadGroupTable(rgReportTable);
|
||||
// TODO -- note that we might be able to save memory (esp. for sparse tables) by making one pass through the GATK report to see the maximum values for each covariate and using that in the constructor here
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE)));
|
||||
|
||||
final GATKReportTable qualReportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap qualTable = parseQualityScoreTable(qualReportTable);
|
||||
parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
|
||||
|
||||
final GATKReportTable covReportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
final NestedHashMap covTable = parseAllCovariatesTable(covReportTable);
|
||||
parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
|
||||
|
||||
parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
|
||||
|
||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
}
|
||||
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
|
|
@ -78,6 +77,19 @@ public class RecalibrationReport {
|
|||
this.optionalCovariateIndexes = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of unique read groups in the table
|
||||
*
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return the number of unique read groups
|
||||
*/
|
||||
private int countReadGroups(final GATKReportTable reportTable) {
|
||||
Set<String> readGroups = new HashSet<String>();
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ )
|
||||
readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString());
|
||||
return readGroups.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines two recalibration reports by adding all observations and errors
|
||||
*
|
||||
|
|
@ -94,14 +106,14 @@ public class RecalibrationReport {
|
|||
public void combine(final RecalibrationReport other) {
|
||||
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedHashMap myTable = recalibrationTables.getTable(type);
|
||||
final NestedHashMap otherTable = other.recalibrationTables.getTable(type);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> myTable = recalibrationTables.getTable(type);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> otherTable = other.recalibrationTables.getTable(type);
|
||||
|
||||
for (final NestedHashMap.Leaf row : otherTable.getAllLeaves()) {
|
||||
final RecalDatum myDatum = (RecalDatum)myTable.get(row.keys);
|
||||
for (final IntegerIndexedNestedHashMap.Leaf row : otherTable.getAllLeaves()) {
|
||||
final RecalDatum myDatum = myTable.get(row.keys);
|
||||
|
||||
if (myDatum == null)
|
||||
myTable.put(row.value, row.keys);
|
||||
myTable.put((RecalDatum)row.value, row.keys);
|
||||
else
|
||||
myDatum.combine((RecalDatum)row.value);
|
||||
}
|
||||
|
|
@ -124,39 +136,34 @@ public class RecalibrationReport {
|
|||
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
*/
|
||||
private NestedHashMap parseAllCovariatesTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
* @param recalibrationTables the recalibration tables
|
||||
\ */
|
||||
private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
|
||||
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
tempCOVarray[2] = covIndex;
|
||||
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[3] = requestedCovariates[covIndex + 2].keyFromValue(covValue);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
|
||||
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[4] = event.index;
|
||||
tempCOVarray[3] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
* @param qualTable the map representing this table
|
||||
*/
|
||||
private NestedHashMap parseQualityScoreTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
private void parseQualityScoreTable(final GATKReportTable reportTable, final IntegerIndexedNestedHashMap<RecalDatum> qualTable) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
|
|
@ -165,31 +172,25 @@ public class RecalibrationReport {
|
|||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
||||
* @param rgTable the map representing this table
|
||||
*/
|
||||
private NestedHashMap parseReadGroupTable(final GATKReportTable reportTable) {
|
||||
final NestedHashMap result = new NestedHashMap();
|
||||
|
||||
private void parseReadGroupTable(final GATKReportTable reportTable, final IntegerIndexedNestedHashMap<RecalDatum> rgTable) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.index;
|
||||
|
||||
result.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||
|
|
@ -298,7 +299,7 @@ public class RecalibrationReport {
|
|||
*/
|
||||
public void calculateEmpiricalAndQuantizedQualities() {
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
||||
final IntegerIndexedNestedHashMap table = recalibrationTables.getTable(type);
|
||||
for (final Object value : table.getAllValues()) {
|
||||
((RecalDatum)value).calcCombinedEmpiricalQuality();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: ebanks
|
||||
* Date: July 1, 2012
|
||||
*/
|
||||
|
||||
public class IntegerIndexedNestedHashMap<T> {
|
||||
|
||||
protected final Object[] data;
|
||||
|
||||
protected final int numDimensions;
|
||||
protected final int[] indexFactors;
|
||||
|
||||
public IntegerIndexedNestedHashMap(final int... dimensions) {
|
||||
numDimensions = dimensions.length;
|
||||
if ( numDimensions == 0 )
|
||||
throw new ReviewedStingException("There must be at least one dimension to an IntegerIndexedNestedHashMap");
|
||||
|
||||
indexFactors = new int[numDimensions];
|
||||
indexFactors[0] = 1;
|
||||
int totalSize = dimensions[0];
|
||||
for ( int i = 1; i < numDimensions; i++ ) {
|
||||
indexFactors[i] = indexFactors[i-1] * dimensions[i-1];
|
||||
totalSize *= dimensions[i];
|
||||
}
|
||||
data = new Object[totalSize];
|
||||
}
|
||||
|
||||
public T get(final int... keys) {
|
||||
return (T)data[calculateIndex(keys)];
|
||||
}
|
||||
|
||||
public synchronized void put(final T value, final int... keys) { // WARNING! value comes before the keys!
|
||||
data[calculateIndex(keys)] = value;
|
||||
}
|
||||
|
||||
protected int calculateIndex(final int... keys) {
|
||||
int index = keys[0];
|
||||
for ( int i = 1; i < numDimensions; i++ )
|
||||
index += keys[i] * indexFactors[i];
|
||||
return index;
|
||||
}
|
||||
|
||||
public List<T> getAllValues() {
|
||||
final int dataLength = data.length;
|
||||
final List<T> result = new ArrayList<T>(dataLength);
|
||||
for ( int i = 0; i < dataLength; i++ ) {
|
||||
final Object value = data[i];
|
||||
if ( value != null )
|
||||
result.add((T)value);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static class Leaf {
|
||||
public final int[] keys;
|
||||
public final Object value;
|
||||
|
||||
public Leaf(final int[] keys, final Object value) {
|
||||
this.keys = keys;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
public List<Leaf> getAllLeaves() {
|
||||
final int dataLength = data.length;
|
||||
final List<Leaf> result = new ArrayList<Leaf>(dataLength);
|
||||
for ( int i = 0; i < dataLength; i++ ) {
|
||||
final Object value = data[i];
|
||||
if ( value != null )
|
||||
result.add(new Leaf(reverseEngineerIndex(i), value));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private int[] reverseEngineerIndex(int index) {
|
||||
final int[] keys = new int[numDimensions];
|
||||
for ( int i = numDimensions - 1; i >= 0; i-- ) {
|
||||
final int key = index / indexFactors[i];
|
||||
index -= (key * indexFactors[i]);
|
||||
keys[i] = key;
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
|
|
@ -42,17 +42,12 @@ public class NestedHashMap {
|
|||
|
||||
public Object get( final Object... keys ) {
|
||||
Map map = this.data;
|
||||
final int keysLength = keys.length;
|
||||
for( int iii = 0; iii < keysLength; iii++ ) {
|
||||
if( iii == keysLength - 1 ) {
|
||||
return map.get(keys[iii]);
|
||||
} else {
|
||||
map = (Map) map.get(keys[iii]);
|
||||
if( map == null ) { return null; }
|
||||
}
|
||||
final int nestedMaps = keys.length - 1;
|
||||
for( int iii = 0; iii < nestedMaps; iii++ ) {
|
||||
map = (Map) map.get(keys[iii]);
|
||||
if( map == null ) { return null; }
|
||||
}
|
||||
|
||||
return null;
|
||||
return map.get(keys[nestedMaps]);
|
||||
}
|
||||
|
||||
public synchronized void put( final Object value, final Object... keys ) { // WARNING! value comes before the keys!
|
||||
|
|
@ -87,7 +82,7 @@ public class NestedHashMap {
|
|||
}
|
||||
|
||||
public List<Object> getAllValues() {
|
||||
List<Object> result = new ArrayList<Object>();
|
||||
final List<Object> result = new ArrayList<Object>();
|
||||
fillAllValues(data, result);
|
||||
return result;
|
||||
}
|
||||
|
|
@ -114,8 +109,8 @@ public class NestedHashMap {
|
|||
}
|
||||
|
||||
public List<Leaf> getAllLeaves() {
|
||||
List<Leaf> result = new ArrayList<Leaf>();
|
||||
List<Object> path = new ArrayList<Object>();
|
||||
final List<Leaf> result = new ArrayList<Leaf>();
|
||||
final List<Object> path = new ArrayList<Object>();
|
||||
fillAllLeaves(data, path, result);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -48,8 +49,6 @@ public class BaseRecalibration {
|
|||
private final RecalibrationTables recalibrationTables;
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
private final Object[] tempKeySet;
|
||||
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
for (int i = 0; i < EventType.values().length; i++)
|
||||
|
|
@ -74,7 +73,6 @@ public class BaseRecalibration {
|
|||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
tempKeySet = new Integer[requestedCovariates.length];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -89,7 +87,6 @@ public class BaseRecalibration {
|
|||
this.recalibrationTables = recalibrationTables;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
tempKeySet = new Integer[requestedCovariates.length];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -112,13 +109,13 @@ public class BaseRecalibration {
|
|||
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||
//final Object[] wrappedKeySet = wrapKeySet(keySet);
|
||||
//Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(wrappedKeySet);
|
||||
Byte recalibratedQualityScore = null;
|
||||
if (recalibratedQualityScore == null) {
|
||||
recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
//qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, wrappedKeySet);
|
||||
}
|
||||
final Byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
//Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(keySet);
|
||||
//Byte recalibratedQualityScore = null;
|
||||
//if (recalibratedQualityScore == null) {
|
||||
// recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
// qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, keySet);
|
||||
//}
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
}
|
||||
|
|
@ -126,12 +123,6 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
private Object[] wrapKeySet(final int[] keySet) {
|
||||
for (int i = 0; i < keySet.length; i++)
|
||||
tempKeySet[i] = keySet[i];
|
||||
return tempKeySet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||
|
|
@ -154,7 +145,7 @@ public class BaseRecalibration {
|
|||
final byte qualFromRead = (byte)(long)key[1];
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE), key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||
|
|
@ -162,10 +153,10 @@ public class BaseRecalibration {
|
|||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||
}
|
||||
|
||||
private double calculateGlobalDeltaQ(final NestedHashMap table, final int[] key, final EventType errorModel) {
|
||||
private double calculateGlobalDeltaQ(final IntegerIndexedNestedHashMap<RecalDatum> table, final int[] key, final EventType errorModel) {
|
||||
double result = 0.0;
|
||||
|
||||
final RecalDatum empiricalQualRG = (RecalDatum)table.get(key[0], errorModel.index);
|
||||
final RecalDatum empiricalQualRG = table.get(key[0], errorModel.index);
|
||||
if (empiricalQualRG != null) {
|
||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||
|
|
@ -175,10 +166,10 @@ public class BaseRecalibration {
|
|||
return result;
|
||||
}
|
||||
|
||||
private double calculateDeltaQReported(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||
private double calculateDeltaQReported(final IntegerIndexedNestedHashMap<RecalDatum> table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final RecalDatum empiricalQualQS = (RecalDatum)table.get(key[0], key[1], errorModel.index);
|
||||
final RecalDatum empiricalQualQS = table.get(key[0], key[1], errorModel.index);
|
||||
if (empiricalQualQS != null) {
|
||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
|
|
@ -187,14 +178,15 @@ public class BaseRecalibration {
|
|||
return result;
|
||||
}
|
||||
|
||||
private double calculateDeltaQCovariates(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
|
||||
private double calculateDeltaQCovariates(final RecalibrationTables recalibrationTables, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
// for all optional covariates
|
||||
for (int i = 2; i < requestedCovariates.length; i++) {
|
||||
if (key[i] < 0)
|
||||
continue;
|
||||
final RecalDatum empiricalQualCO = (RecalDatum)table.get(key[0], key[1], (i-2), key[i], errorModel.index);
|
||||
|
||||
final RecalDatum empiricalQualCO = recalibrationTables.getTable(i).get(key[0], key[1], key[i], errorModel.index);
|
||||
if (empiricalQualCO != null) {
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||
|
|
|
|||
|
|
@ -25,7 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
|
||||
/**
|
||||
* Utility class to facilitate on-the-fly base quality score recalibration.
|
||||
|
|
@ -39,24 +42,42 @@ public class RecalibrationTables {
|
|||
public enum TableType {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLE(2);
|
||||
OPTIONAL_COVARIATE_TABLES_START(2);
|
||||
|
||||
private final int index;
|
||||
public final int index;
|
||||
|
||||
private TableType(final int index) {
|
||||
this.index = index;
|
||||
}
|
||||
}
|
||||
|
||||
private final NestedHashMap[] tables = new NestedHashMap[TableType.values().length];
|
||||
private final IntegerIndexedNestedHashMap[] tables;
|
||||
|
||||
public RecalibrationTables(final NestedHashMap rgMap, final NestedHashMap qualMap, final NestedHashMap covMap) {
|
||||
tables[TableType.READ_GROUP_TABLE.index] = rgMap;
|
||||
tables[TableType.QUALITY_SCORE_TABLE.index] = qualMap;
|
||||
tables[TableType.OPTIONAL_COVARIATE_TABLE.index] = covMap;
|
||||
public RecalibrationTables(final Covariate[] covariates) {
|
||||
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1);
|
||||
}
|
||||
|
||||
public NestedHashMap getTable(final TableType type) {
|
||||
return tables[type.index];
|
||||
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
|
||||
tables = new IntegerIndexedNestedHashMap[covariates.length];
|
||||
|
||||
final int qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1;
|
||||
final int eventDimension = EventType.values().length;
|
||||
|
||||
tables[TableType.READ_GROUP_TABLE.index] = new IntegerIndexedNestedHashMap<RecalDatum>(numReadGroups, eventDimension);
|
||||
tables[TableType.QUALITY_SCORE_TABLE.index] = new IntegerIndexedNestedHashMap<RecalDatum>(numReadGroups, qualDimension, eventDimension);
|
||||
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++)
|
||||
tables[i] = new IntegerIndexedNestedHashMap<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension);
|
||||
}
|
||||
|
||||
public IntegerIndexedNestedHashMap<RecalDatum> getTable(final TableType type) {
|
||||
return (IntegerIndexedNestedHashMap<RecalDatum>)tables[type.index];
|
||||
}
|
||||
|
||||
public IntegerIndexedNestedHashMap<RecalDatum> getTable(final int index) {
|
||||
return (IntegerIndexedNestedHashMap<RecalDatum>)tables[index];
|
||||
}
|
||||
|
||||
public int numTables() {
|
||||
return tables.length;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
|
|
@ -74,9 +74,9 @@ public class RecalibrationReportUnitTest {
|
|||
int nKeys = 0; // keep track of how many keys were produced
|
||||
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
|
||||
final NestedHashMap rgTable = new NestedHashMap();
|
||||
final NestedHashMap qualTable = new NestedHashMap();
|
||||
final NestedHashMap covTable = new NestedHashMap();
|
||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
|
||||
for (int offset = 0; offset < length; offset++) {
|
||||
|
||||
|
|
@ -89,15 +89,14 @@ public class RecalibrationReportUnitTest {
|
|||
qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
||||
nKeys += 2;
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[2 + j], errorMode.index);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||
covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], errorMode.index);
|
||||
nKeys++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(nKeys, expectedKeys);
|
||||
|
||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
|
||||
final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC);
|
||||
|
||||
File output = new File("RecalibrationReportUnitTestOutuput.grp");
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -21,7 +21,6 @@ import java.util.*;
|
|||
public class BaseRecalibrationUnitTest {
|
||||
|
||||
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private ReadGroupCovariate rgCovariate;
|
||||
private QualityScoreCovariate qsCovariate;
|
||||
|
|
@ -74,9 +73,9 @@ public class BaseRecalibrationUnitTest {
|
|||
|
||||
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
|
||||
final NestedHashMap rgTable = new NestedHashMap();
|
||||
final NestedHashMap qualTable = new NestedHashMap();
|
||||
final NestedHashMap covTable = new NestedHashMap();
|
||||
RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
|
||||
for (int i=0; i<read.getReadLength(); i++) {
|
||||
final int[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||
|
|
@ -96,12 +95,11 @@ public class BaseRecalibrationUnitTest {
|
|||
rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index);
|
||||
qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index);
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[2 + j], EventType.BASE_SUBSTITUTION.index);
|
||||
final IntegerIndexedNestedHashMap<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], EventType.BASE_SUBSTITUTION.index);
|
||||
}
|
||||
}
|
||||
|
||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
||||
|
||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue