Initial version of int indexed mapping for BQSR. Will be cleaned up in a bit.
This commit is contained in:
parent
c94c8a9c09
commit
cac72bce91
|
|
@ -664,12 +664,8 @@ public class SAMDataSource {
|
||||||
IndexedFastaSequenceFile refReader,
|
IndexedFastaSequenceFile refReader,
|
||||||
BaseRecalibration bqsrApplier,
|
BaseRecalibration bqsrApplier,
|
||||||
byte defaultBaseQualities) {
|
byte defaultBaseQualities) {
|
||||||
if (useOriginalBaseQualities || defaultBaseQualities >= 0)
|
// **** NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! ****
|
||||||
// only wrap if we are replacing the original qualities or using a default base quality
|
// (otherwise we will process something that we may end up throwing away)
|
||||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
|
||||||
|
|
||||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
|
||||||
// as there is no reason to sort something that we will end of throwing away
|
|
||||||
if (downsamplingFraction != null)
|
if (downsamplingFraction != null)
|
||||||
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||||
|
|
||||||
|
|
@ -678,14 +674,18 @@ public class SAMDataSource {
|
||||||
if (!noValidationOfReadOrder && enableVerification)
|
if (!noValidationOfReadOrder && enableVerification)
|
||||||
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
|
wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
|
||||||
|
|
||||||
|
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||||
|
|
||||||
|
if (useOriginalBaseQualities || defaultBaseQualities >= 0)
|
||||||
|
// only wrap if we are replacing the original qualities or using a default base quality
|
||||||
|
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||||
|
|
||||||
if (bqsrApplier != null)
|
if (bqsrApplier != null)
|
||||||
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
|
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
|
||||||
|
|
||||||
if (cmode != BAQ.CalculationMode.OFF)
|
if (cmode != BAQ.CalculationMode.OFF)
|
||||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
||||||
|
|
||||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
|
||||||
|
|
||||||
return wrappedIterator;
|
return wrappedIterator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,7 +204,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
private static int keyFromContext(final byte[] dna, final int start, final int end) {
|
private static int keyFromContext(final byte[] dna, final int start, final int end) {
|
||||||
|
|
||||||
int key = end - start;
|
int key = end - start;
|
||||||
int bitOffset = 4;
|
int bitOffset = LENGTH_BITS;
|
||||||
for (int i = start; i < end; i++) {
|
for (int i = start; i < end; i++) {
|
||||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||||
if (baseIndex == -1) // ignore non-ACGT bases
|
if (baseIndex == -1) // ignore non-ACGT bases
|
||||||
|
|
@ -227,7 +227,7 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||||
int mask = 48; // use the mask to pull out bases
|
int mask = 48; // use the mask to pull out bases
|
||||||
int offset = 4;
|
int offset = LENGTH_BITS;
|
||||||
|
|
||||||
StringBuilder dna = new StringBuilder();
|
StringBuilder dna = new StringBuilder();
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
|
|
@ -239,4 +239,17 @@ public class ContextCovariate implements StandardCovariate {
|
||||||
|
|
||||||
return dna.toString();
|
return dna.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int maximumKeyValue() {
|
||||||
|
// the maximum value is T (11 in binary) for each base in the context
|
||||||
|
int length = Math.max(mismatchesContextSize, indelsContextSize); // the length of the context
|
||||||
|
int key = length;
|
||||||
|
int bitOffset = LENGTH_BITS;
|
||||||
|
for (int i = 0; i <length ; i++) {
|
||||||
|
key |= (3 << bitOffset);
|
||||||
|
bitOffset += 2;
|
||||||
|
}
|
||||||
|
return key;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public interface Covariate {
|
public interface Covariate {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize any member variables using the command-line arguments passed to the walker
|
* Initialize any member variables using the command-line arguments passed to the walker
|
||||||
*
|
*
|
||||||
|
|
@ -79,6 +80,13 @@ public interface Covariate {
|
||||||
* @return a long representation of the object
|
* @return a long representation of the object
|
||||||
*/
|
*/
|
||||||
public int keyFromValue(final Object value);
|
public int keyFromValue(final Object value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the maximum value possible for any key representing this covariate
|
||||||
|
*
|
||||||
|
* @return the maximum value possible for any key representing this covariate
|
||||||
|
*/
|
||||||
|
public int maximumKeyValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequiredCovariate extends Covariate {}
|
interface RequiredCovariate extends Covariate {}
|
||||||
|
|
|
||||||
|
|
@ -46,8 +46,11 @@ import java.util.EnumSet;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class CycleCovariate implements StandardCovariate {
|
public class CycleCovariate implements StandardCovariate {
|
||||||
private final static EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
|
||||||
private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
private static final int MAXIMUM_CYCLE_VALUE = 1000;
|
||||||
|
|
||||||
|
private static final EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
||||||
|
private static final EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -202,6 +205,11 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value);
|
return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int maximumKeyValue() {
|
||||||
|
return (MAXIMUM_CYCLE_VALUE << 1) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
private static int keyFromCycle(final int cycle) {
|
private static int keyFromCycle(final int cycle) {
|
||||||
// no negative values because values must fit into the first few bits of the long
|
// no negative values because values must fit into the first few bits of the long
|
||||||
int result = Math.abs(cycle);
|
int result = Math.abs(cycle);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -67,4 +68,9 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
public int keyFromValue(final Object value) {
|
public int keyFromValue(final Object value) {
|
||||||
return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value;
|
return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int maximumKeyValue() {
|
||||||
|
return QualityUtils.MAX_QUAL_SCORE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
import org.broadinstitute.sting.utils.recalibration.QualQuantizer;
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||||
|
|
||||||
|
|
@ -36,10 +36,10 @@ public class QuantizationInfo {
|
||||||
for (int i = 0; i < qualHistogram.length; i++)
|
for (int i = 0; i < qualHistogram.length; i++)
|
||||||
qualHistogram[i] = 0L;
|
qualHistogram[i] = 0L;
|
||||||
|
|
||||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
||||||
|
|
||||||
for (final Object value : qualTable.getAllValues()) {
|
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||||
final RecalDatum datum = (RecalDatum)value;
|
final RecalDatum datum = value;
|
||||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||||
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
|
qualHistogram[empiricalQual] += datum.numObservations; // add the number of observations for every key
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,11 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
return readGroupLookupTable.get(readGroupId);
|
return readGroupLookupTable.get(readGroupId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int maximumKeyValue() {
|
||||||
|
return readGroupLookupTable.size() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the sample has a PU tag annotation, return that. If not, return the read group id.
|
* If the sample has a PU tag annotation, return that. If not, return the read group id.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
|
|
@ -211,19 +212,19 @@ public class RecalDataManager {
|
||||||
|
|
||||||
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
private static List<GATKReportTable> generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) {
|
||||||
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
List<GATKReportTable> result = new LinkedList<GATKReportTable>();
|
||||||
int tableIndex = 0;
|
int reportTableIndex = 0;
|
||||||
|
|
||||||
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
final Map<Covariate, String> covariateNameMap = new HashMap<Covariate, String>(requestedCovariates.length);
|
||||||
for (final Covariate covariate : requestedCovariates)
|
for (final Covariate covariate : requestedCovariates)
|
||||||
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
covariateNameMap.put(covariate, parseCovariateName(covariate));
|
||||||
|
|
||||||
for (final RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
|
||||||
|
|
||||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
if (tableIndex == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||||
columnNames.add(covariateValue);
|
columnNames.add(covariateValue);
|
||||||
columnNames.add(covariateName);
|
columnNames.add(covariateName);
|
||||||
}
|
}
|
||||||
|
|
@ -231,41 +232,40 @@ public class RecalDataManager {
|
||||||
|
|
||||||
columnNames.add(eventType); // the order of these column names is important here
|
columnNames.add(eventType); // the order of these column names is important here
|
||||||
columnNames.add(empiricalQuality);
|
columnNames.add(empiricalQuality);
|
||||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||||
columnNames.add(nObservations);
|
columnNames.add(nObservations);
|
||||||
columnNames.add(nErrors);
|
columnNames.add(nErrors);
|
||||||
|
|
||||||
final GATKReportTable reportTable = new GATKReportTable("RecalTable" + tableIndex++, "", columnNames.size());
|
final GATKReportTable reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||||
for (final Pair<String, String> columnName : columnNames)
|
for (final Pair<String, String> columnName : columnNames)
|
||||||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type
|
reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); // every table must have the event type
|
||||||
|
|
||||||
int rowIndex = 0;
|
int rowIndex = 0;
|
||||||
|
|
||||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
final IntegerIndexedNestedHashMap<RecalDatum> table = recalibrationTables.getTable(tableIndex);
|
||||||
for (final NestedHashMap.Leaf row : table.getAllLeaves()) {
|
for (final IntegerIndexedNestedHashMap.Leaf row : table.getAllLeaves()) {
|
||||||
final RecalDatum datum = (RecalDatum)row.value;
|
final RecalDatum datum = (RecalDatum)row.value;
|
||||||
final List<Object> keys = row.keys;
|
final int[] keys = row.keys;
|
||||||
|
|
||||||
int columnIndex = 0;
|
int columnIndex = 0;
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey((Integer)keys.get(columnIndex++)));
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[0].formatKey(keys[columnIndex++]));
|
||||||
if (type != RecalibrationTables.TableType.READ_GROUP_TABLE) {
|
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey((Integer) keys.get(columnIndex++)));
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), requestedCovariates[1].formatKey(keys[columnIndex++]));
|
||||||
if (type == RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE) {
|
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||||
final int covariateIndex = (Integer)keys.get(columnIndex);
|
final Covariate covariate = requestedCovariates[tableIndex];
|
||||||
final Covariate covariate = requestedCovariates[2 + covariateIndex];
|
final int covariateKey = keys[columnIndex+1];
|
||||||
final int covariateKey = (Integer)keys.get(columnIndex+1);
|
|
||||||
|
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(covariateKey));
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(covariateKey));
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariateNameMap.get(covariate));
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), covariateNameMap.get(covariate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final EventType event = EventType.eventFrom((Integer)keys.get(columnIndex));
|
final EventType event = EventType.eventFrom(keys[columnIndex]);
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), event);
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), event);
|
||||||
|
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||||
if (type == RecalibrationTables.TableType.READ_GROUP_TABLE)
|
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex++).getFirst(), datum.numObservations);
|
||||||
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
|
setReportTableCell(reportTable, rowIndex, columnNames.get(columnIndex).getFirst(), datum.numMismatches);
|
||||||
|
|
@ -344,25 +344,31 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
private static void writeCSV(final PrintStream deltaTableFile, final RecalibrationTables recalibrationTables, final String recalibrationMode, final Covariate[] requestedCovariates, final boolean printHeader) {
|
||||||
|
|
||||||
final NestedHashMap deltaTable = new NestedHashMap();
|
final NestedHashMap deltaTable = new NestedHashMap();
|
||||||
|
|
||||||
// add the quality score table to the delta table
|
// add the quality score table to the delta table
|
||||||
final NestedHashMap qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||||
for (final NestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
for (final IntegerIndexedNestedHashMap.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||||
final List<Object> newCovs = new ArrayList<Object>(4);
|
final int[] newCovs = new int[4];
|
||||||
newCovs.add(leaf.keys.get(0));
|
newCovs[0] = leaf.keys[0];
|
||||||
newCovs.add(requestedCovariates.length); // replace the covariate name with an arbitrary (unused) index for QualityScore
|
newCovs[1] = requestedCovariates.length; // replace the covariate name with an arbitrary (unused) index for QualityScore
|
||||||
newCovs.add(leaf.keys.get(1));
|
newCovs[2] = leaf.keys[1];
|
||||||
newCovs.add(leaf.keys.get(2));
|
newCovs[3] = leaf.keys[2];
|
||||||
addToDeltaTable(deltaTable, newCovs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
addToDeltaTable(deltaTable, newCovs, (RecalDatum)leaf.value); // add this covariate to the delta table
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the optional covariates to the delta table
|
// add the optional covariates to the delta table
|
||||||
final NestedHashMap covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE);
|
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < requestedCovariates.length; i++) {
|
||||||
for (final NestedHashMap.Leaf leaf : covTable.getAllLeaves()) {
|
final IntegerIndexedNestedHashMap<RecalDatum> covTable = recalibrationTables.getTable(i);
|
||||||
final List<Object> covs = new ArrayList<Object>(leaf.keys);
|
for (final IntegerIndexedNestedHashMap.Leaf leaf : covTable.getAllLeaves()) {
|
||||||
covs.remove(1); // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
final int[] covs = new int[leaf.keys.length-1];
|
||||||
addToDeltaTable(deltaTable, covs.toArray(), (RecalDatum)leaf.value); // add this covariate to the delta table
|
covs[0] = leaf.keys[0];
|
||||||
|
covs[1] = i; // reset the quality score covariate to 0 from the keyset (so we aggregate all rows regardless of QS)
|
||||||
|
covs[2] = leaf.keys[2];
|
||||||
|
covs[3] = leaf.keys[3];
|
||||||
|
addToDeltaTable(deltaTable, covs, (RecalDatum) leaf.value); // add this covariate to the delta table
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// output the csv file
|
// output the csv file
|
||||||
|
|
@ -397,12 +403,9 @@ public class RecalDataManager {
|
||||||
|
|
||||||
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||||
final List<Object> values = new ArrayList<Object>(4);
|
final List<Object> values = new ArrayList<Object>(4);
|
||||||
values.add(covariates[0].formatKey((Integer)keys.get(0)));
|
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.index].formatKey((Integer)keys.get(0)));
|
||||||
|
|
||||||
// TODO -- create static final variables to hold the indexes of the RG, qual, cov ID, etc.
|
|
||||||
|
|
||||||
final int covariateIndex = (Integer)keys.get(1);
|
final int covariateIndex = (Integer)keys.get(1);
|
||||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[1] : covariates[2 + covariateIndex];
|
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index] : covariates[covariateIndex];
|
||||||
final int covariateKey = (Integer)keys.get(2);
|
final int covariateKey = (Integer)keys.get(2);
|
||||||
values.add(covariate.formatKey(covariateKey));
|
values.add(covariate.formatKey(covariateKey));
|
||||||
values.add(covariateNameMap.get(covariate));
|
values.add(covariateNameMap.get(covariate));
|
||||||
|
|
@ -422,14 +425,22 @@ public class RecalDataManager {
|
||||||
* @param deltaKey the key to the table
|
* @param deltaKey the key to the table
|
||||||
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
* @param recalDatum the recal datum to combine with the accuracyDatum element in the table
|
||||||
*/
|
*/
|
||||||
private static void addToDeltaTable(final NestedHashMap deltaTable, final Object[] deltaKey, final RecalDatum recalDatum) {
|
private static void addToDeltaTable(final NestedHashMap deltaTable, final int[] deltaKey, final RecalDatum recalDatum) {
|
||||||
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(deltaKey); // check if we already have a RecalDatum for this key
|
Object[] wrappedKey = wrapKeys(deltaKey);
|
||||||
|
final RecalDatum deltaDatum = (RecalDatum)deltaTable.get(wrappedKey); // check if we already have a RecalDatum for this key
|
||||||
if (deltaDatum == null)
|
if (deltaDatum == null)
|
||||||
deltaTable.put(new RecalDatum(recalDatum), deltaKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
deltaTable.put(new RecalDatum(recalDatum), wrappedKey); // if we don't have a key yet, create a new one with the same values as the curent datum
|
||||||
else
|
else
|
||||||
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
deltaDatum.combine(recalDatum); // if we do have a datum, combine it with this one.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Object[] wrapKeys(final int[] keys) {
|
||||||
|
final Object[] wrappedKeys = new Object[keys.length];
|
||||||
|
for (int i = 0; i < keys.length; i++)
|
||||||
|
wrappedKeys[i] = keys[i];
|
||||||
|
return wrappedKeys;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||||
|
|
||||||
|
|
@ -26,9 +26,9 @@ public class RecalibrationReport {
|
||||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||||
|
|
||||||
private final Object[] tempRGarray = new Object[2];
|
private final int[] tempRGarray = new int[2];
|
||||||
private final Object[] tempQUALarray = new Object[3];
|
private final int[] tempQUALarray = new int[3];
|
||||||
private final Object[] tempCOVarray = new Object[5];
|
private final int[] tempCOVarray = new int[4];
|
||||||
|
|
||||||
public RecalibrationReport(final File RECAL_FILE) {
|
public RecalibrationReport(final File RECAL_FILE) {
|
||||||
final GATKReport report = new GATKReport(RECAL_FILE);
|
final GATKReport report = new GATKReport(RECAL_FILE);
|
||||||
|
|
@ -57,16 +57,15 @@ public class RecalibrationReport {
|
||||||
for (Covariate cov : requestedCovariates)
|
for (Covariate cov : requestedCovariates)
|
||||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||||
|
|
||||||
final GATKReportTable rgReportTable = report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE);
|
// TODO -- note that we might be able to save memory (esp. for sparse tables) by making one pass through the GATK report to see the maximum values for each covariate and using that in the constructor here
|
||||||
final NestedHashMap rgTable = parseReadGroupTable(rgReportTable);
|
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE)));
|
||||||
|
|
||||||
final GATKReportTable qualReportTable = report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
parseReadGroupTable(report.getTable(RecalDataManager.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
|
||||||
final NestedHashMap qualTable = parseQualityScoreTable(qualReportTable);
|
|
||||||
|
|
||||||
final GATKReportTable covReportTable = report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
parseQualityScoreTable(report.getTable(RecalDataManager.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
|
||||||
final NestedHashMap covTable = parseAllCovariatesTable(covReportTable);
|
|
||||||
|
parseAllCovariatesTable(report.getTable(RecalDataManager.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
|
||||||
|
|
||||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||||
|
|
@ -78,6 +77,19 @@ public class RecalibrationReport {
|
||||||
this.optionalCovariateIndexes = null;
|
this.optionalCovariateIndexes = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counts the number of unique read groups in the table
|
||||||
|
*
|
||||||
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
|
* @return the number of unique read groups
|
||||||
|
*/
|
||||||
|
private int countReadGroups(final GATKReportTable reportTable) {
|
||||||
|
Set<String> readGroups = new HashSet<String>();
|
||||||
|
for ( int i = 0; i < reportTable.getNumRows(); i++ )
|
||||||
|
readGroups.add(reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME).toString());
|
||||||
|
return readGroups.size();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combines two recalibration reports by adding all observations and errors
|
* Combines two recalibration reports by adding all observations and errors
|
||||||
*
|
*
|
||||||
|
|
@ -94,14 +106,14 @@ public class RecalibrationReport {
|
||||||
public void combine(final RecalibrationReport other) {
|
public void combine(final RecalibrationReport other) {
|
||||||
|
|
||||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||||
final NestedHashMap myTable = recalibrationTables.getTable(type);
|
final IntegerIndexedNestedHashMap<RecalDatum> myTable = recalibrationTables.getTable(type);
|
||||||
final NestedHashMap otherTable = other.recalibrationTables.getTable(type);
|
final IntegerIndexedNestedHashMap<RecalDatum> otherTable = other.recalibrationTables.getTable(type);
|
||||||
|
|
||||||
for (final NestedHashMap.Leaf row : otherTable.getAllLeaves()) {
|
for (final IntegerIndexedNestedHashMap.Leaf row : otherTable.getAllLeaves()) {
|
||||||
final RecalDatum myDatum = (RecalDatum)myTable.get(row.keys);
|
final RecalDatum myDatum = myTable.get(row.keys);
|
||||||
|
|
||||||
if (myDatum == null)
|
if (myDatum == null)
|
||||||
myTable.put(row.value, row.keys);
|
myTable.put((RecalDatum)row.value, row.keys);
|
||||||
else
|
else
|
||||||
myDatum.combine((RecalDatum)row.value);
|
myDatum.combine((RecalDatum)row.value);
|
||||||
}
|
}
|
||||||
|
|
@ -124,39 +136,34 @@ public class RecalibrationReport {
|
||||||
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
||||||
*
|
*
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
* @param recalibrationTables the recalibration tables
|
||||||
*/
|
\ */
|
||||||
private NestedHashMap parseAllCovariatesTable(final GATKReportTable reportTable) {
|
private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
|
||||||
final NestedHashMap result = new NestedHashMap();
|
|
||||||
|
|
||||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||||
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||||
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
final Object qual = reportTable.get(i, RecalDataManager.QUALITY_SCORE_COLUMN_NAME);
|
||||||
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||||
|
|
||||||
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
final String covName = (String)reportTable.get(i, RecalDataManager.COVARIATE_NAME_COLUMN_NAME);
|
||||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||||
tempCOVarray[2] = covIndex;
|
|
||||||
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
final Object covValue = reportTable.get(i, RecalDataManager.COVARIATE_VALUE_COLUMN_NAME);
|
||||||
tempCOVarray[3] = requestedCovariates[covIndex + 2].keyFromValue(covValue);
|
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
|
||||||
|
|
||||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||||
tempCOVarray[4] = event.index;
|
tempCOVarray[3] = event.index;
|
||||||
|
|
||||||
result.put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
* @param qualTable the map representing this table
|
||||||
*/
|
*/
|
||||||
private NestedHashMap parseQualityScoreTable(final GATKReportTable reportTable) {
|
private void parseQualityScoreTable(final GATKReportTable reportTable, final IntegerIndexedNestedHashMap<RecalDatum> qualTable) {
|
||||||
final NestedHashMap result = new NestedHashMap();
|
|
||||||
|
|
||||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||||
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||||
|
|
@ -165,31 +172,25 @@ public class RecalibrationReport {
|
||||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||||
tempQUALarray[2] = event.index;
|
tempQUALarray[2] = event.index;
|
||||||
|
|
||||||
result.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
||||||
*
|
*
|
||||||
* @param reportTable the GATKReport table containing data for this table
|
* @param reportTable the GATKReport table containing data for this table
|
||||||
* @return a lookup table indexed by bitsets containing the empirical quality and estimated quality reported for every key.
|
* @param rgTable the map representing this table
|
||||||
*/
|
*/
|
||||||
private NestedHashMap parseReadGroupTable(final GATKReportTable reportTable) {
|
private void parseReadGroupTable(final GATKReportTable reportTable, final IntegerIndexedNestedHashMap<RecalDatum> rgTable) {
|
||||||
final NestedHashMap result = new NestedHashMap();
|
|
||||||
|
|
||||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||||
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
final Object rg = reportTable.get(i, RecalDataManager.READGROUP_COLUMN_NAME);
|
||||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalDataManager.EVENT_TYPE_COLUMN_NAME));
|
||||||
tempRGarray[1] = event.index;
|
tempRGarray[1] = event.index;
|
||||||
|
|
||||||
result.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||||
|
|
@ -298,7 +299,7 @@ public class RecalibrationReport {
|
||||||
*/
|
*/
|
||||||
public void calculateEmpiricalAndQuantizedQualities() {
|
public void calculateEmpiricalAndQuantizedQualities() {
|
||||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||||
final NestedHashMap table = recalibrationTables.getTable(type);
|
final IntegerIndexedNestedHashMap table = recalibrationTables.getTable(type);
|
||||||
for (final Object value : table.getAllValues()) {
|
for (final Object value : table.getAllValues()) {
|
||||||
((RecalDatum)value).calcCombinedEmpiricalQuality();
|
((RecalDatum)value).calcCombinedEmpiricalQuality();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,117 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.collections;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: ebanks
|
||||||
|
* Date: July 1, 2012
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class IntegerIndexedNestedHashMap<T> {
|
||||||
|
|
||||||
|
protected final Object[] data;
|
||||||
|
|
||||||
|
protected final int numDimensions;
|
||||||
|
protected final int[] indexFactors;
|
||||||
|
|
||||||
|
public IntegerIndexedNestedHashMap(final int... dimensions) {
|
||||||
|
numDimensions = dimensions.length;
|
||||||
|
if ( numDimensions == 0 )
|
||||||
|
throw new ReviewedStingException("There must be at least one dimension to an IntegerIndexedNestedHashMap");
|
||||||
|
|
||||||
|
indexFactors = new int[numDimensions];
|
||||||
|
indexFactors[0] = 1;
|
||||||
|
int totalSize = dimensions[0];
|
||||||
|
for ( int i = 1; i < numDimensions; i++ ) {
|
||||||
|
indexFactors[i] = indexFactors[i-1] * dimensions[i-1];
|
||||||
|
totalSize *= dimensions[i];
|
||||||
|
}
|
||||||
|
data = new Object[totalSize];
|
||||||
|
}
|
||||||
|
|
||||||
|
public T get(final int... keys) {
|
||||||
|
return (T)data[calculateIndex(keys)];
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void put(final T value, final int... keys) { // WARNING! value comes before the keys!
|
||||||
|
data[calculateIndex(keys)] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int calculateIndex(final int... keys) {
|
||||||
|
int index = keys[0];
|
||||||
|
for ( int i = 1; i < numDimensions; i++ )
|
||||||
|
index += keys[i] * indexFactors[i];
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<T> getAllValues() {
|
||||||
|
final int dataLength = data.length;
|
||||||
|
final List<T> result = new ArrayList<T>(dataLength);
|
||||||
|
for ( int i = 0; i < dataLength; i++ ) {
|
||||||
|
final Object value = data[i];
|
||||||
|
if ( value != null )
|
||||||
|
result.add((T)value);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Leaf {
|
||||||
|
public final int[] keys;
|
||||||
|
public final Object value;
|
||||||
|
|
||||||
|
public Leaf(final int[] keys, final Object value) {
|
||||||
|
this.keys = keys;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Leaf> getAllLeaves() {
|
||||||
|
final int dataLength = data.length;
|
||||||
|
final List<Leaf> result = new ArrayList<Leaf>(dataLength);
|
||||||
|
for ( int i = 0; i < dataLength; i++ ) {
|
||||||
|
final Object value = data[i];
|
||||||
|
if ( value != null )
|
||||||
|
result.add(new Leaf(reverseEngineerIndex(i), value));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] reverseEngineerIndex(int index) {
|
||||||
|
final int[] keys = new int[numDimensions];
|
||||||
|
for ( int i = numDimensions - 1; i >= 0; i-- ) {
|
||||||
|
final int key = index / indexFactors[i];
|
||||||
|
index -= (key * indexFactors[i]);
|
||||||
|
keys[i] = key;
|
||||||
|
}
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -42,17 +42,12 @@ public class NestedHashMap {
|
||||||
|
|
||||||
public Object get( final Object... keys ) {
|
public Object get( final Object... keys ) {
|
||||||
Map map = this.data;
|
Map map = this.data;
|
||||||
final int keysLength = keys.length;
|
final int nestedMaps = keys.length - 1;
|
||||||
for( int iii = 0; iii < keysLength; iii++ ) {
|
for( int iii = 0; iii < nestedMaps; iii++ ) {
|
||||||
if( iii == keysLength - 1 ) {
|
map = (Map) map.get(keys[iii]);
|
||||||
return map.get(keys[iii]);
|
if( map == null ) { return null; }
|
||||||
} else {
|
|
||||||
map = (Map) map.get(keys[iii]);
|
|
||||||
if( map == null ) { return null; }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return map.get(keys[nestedMaps]);
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void put( final Object value, final Object... keys ) { // WARNING! value comes before the keys!
|
public synchronized void put( final Object value, final Object... keys ) { // WARNING! value comes before the keys!
|
||||||
|
|
@ -87,7 +82,7 @@ public class NestedHashMap {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Object> getAllValues() {
|
public List<Object> getAllValues() {
|
||||||
List<Object> result = new ArrayList<Object>();
|
final List<Object> result = new ArrayList<Object>();
|
||||||
fillAllValues(data, result);
|
fillAllValues(data, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
@ -114,8 +109,8 @@ public class NestedHashMap {
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Leaf> getAllLeaves() {
|
public List<Leaf> getAllLeaves() {
|
||||||
List<Leaf> result = new ArrayList<Leaf>();
|
final List<Leaf> result = new ArrayList<Leaf>();
|
||||||
List<Object> path = new ArrayList<Object>();
|
final List<Object> path = new ArrayList<Object>();
|
||||||
fillAllLeaves(data, path, result);
|
fillAllLeaves(data, path, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
||||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
|
@ -48,8 +49,6 @@ public class BaseRecalibration {
|
||||||
private final RecalibrationTables recalibrationTables;
|
private final RecalibrationTables recalibrationTables;
|
||||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||||
|
|
||||||
private final Object[] tempKeySet;
|
|
||||||
|
|
||||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||||
static {
|
static {
|
||||||
for (int i = 0; i < EventType.values().length; i++)
|
for (int i = 0; i < EventType.values().length; i++)
|
||||||
|
|
@ -74,7 +73,6 @@ public class BaseRecalibration {
|
||||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||||
|
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
tempKeySet = new Integer[requestedCovariates.length];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -89,7 +87,6 @@ public class BaseRecalibration {
|
||||||
this.recalibrationTables = recalibrationTables;
|
this.recalibrationTables = recalibrationTables;
|
||||||
this.requestedCovariates = requestedCovariates;
|
this.requestedCovariates = requestedCovariates;
|
||||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||||
tempKeySet = new Integer[requestedCovariates.length];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -112,13 +109,13 @@ public class BaseRecalibration {
|
||||||
|
|
||||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||||
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model
|
||||||
//final Object[] wrappedKeySet = wrapKeySet(keySet);
|
final Byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||||
//Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(wrappedKeySet);
|
//Byte recalibratedQualityScore = (Byte) qualityScoreByFullCovariateKey[errorModel.index].get(keySet);
|
||||||
Byte recalibratedQualityScore = null;
|
//Byte recalibratedQualityScore = null;
|
||||||
if (recalibratedQualityScore == null) {
|
//if (recalibratedQualityScore == null) {
|
||||||
recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
// recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||||
//qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, wrappedKeySet);
|
// qualityScoreByFullCovariateKey[errorModel.index].put(recalibratedQualityScore, keySet);
|
||||||
}
|
//}
|
||||||
quals[offset] = recalibratedQualityScore;
|
quals[offset] = recalibratedQualityScore;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -126,12 +123,6 @@ public class BaseRecalibration {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Object[] wrapKeySet(final int[] keySet) {
|
|
||||||
for (int i = 0; i < keySet.length; i++)
|
|
||||||
tempKeySet[i] = keySet[i];
|
|
||||||
return tempKeySet;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements a serial recalibration of the reads using the combinational table.
|
* Implements a serial recalibration of the reads using the combinational table.
|
||||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||||
|
|
@ -154,7 +145,7 @@ public class BaseRecalibration {
|
||||||
final byte qualFromRead = (byte)(long)key[1];
|
final byte qualFromRead = (byte)(long)key[1];
|
||||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLE), key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||||
|
|
||||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||||
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL
|
||||||
|
|
@ -162,10 +153,10 @@ public class BaseRecalibration {
|
||||||
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateGlobalDeltaQ(final NestedHashMap table, final int[] key, final EventType errorModel) {
|
private double calculateGlobalDeltaQ(final IntegerIndexedNestedHashMap<RecalDatum> table, final int[] key, final EventType errorModel) {
|
||||||
double result = 0.0;
|
double result = 0.0;
|
||||||
|
|
||||||
final RecalDatum empiricalQualRG = (RecalDatum)table.get(key[0], errorModel.index);
|
final RecalDatum empiricalQualRG = table.get(key[0], errorModel.index);
|
||||||
if (empiricalQualRG != null) {
|
if (empiricalQualRG != null) {
|
||||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||||
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
final double aggregrateQReported = empiricalQualRG.getEstimatedQReported();
|
||||||
|
|
@ -175,10 +166,10 @@ public class BaseRecalibration {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateDeltaQReported(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
private double calculateDeltaQReported(final IntegerIndexedNestedHashMap<RecalDatum> table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||||
double result = 0.0;
|
double result = 0.0;
|
||||||
|
|
||||||
final RecalDatum empiricalQualQS = (RecalDatum)table.get(key[0], key[1], errorModel.index);
|
final RecalDatum empiricalQualQS = table.get(key[0], key[1], errorModel.index);
|
||||||
if (empiricalQualQS != null) {
|
if (empiricalQualQS != null) {
|
||||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||||
|
|
@ -187,14 +178,15 @@ public class BaseRecalibration {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calculateDeltaQCovariates(final NestedHashMap table, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
|
private double calculateDeltaQCovariates(final RecalibrationTables recalibrationTables, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
|
||||||
double result = 0.0;
|
double result = 0.0;
|
||||||
|
|
||||||
// for all optional covariates
|
// for all optional covariates
|
||||||
for (int i = 2; i < requestedCovariates.length; i++) {
|
for (int i = 2; i < requestedCovariates.length; i++) {
|
||||||
if (key[i] < 0)
|
if (key[i] < 0)
|
||||||
continue;
|
continue;
|
||||||
final RecalDatum empiricalQualCO = (RecalDatum)table.get(key[0], key[1], (i-2), key[i], errorModel.index);
|
|
||||||
|
final RecalDatum empiricalQualCO = recalibrationTables.getTable(i).get(key[0], key[1], key[i], errorModel.index);
|
||||||
if (empiricalQualCO != null) {
|
if (empiricalQualCO != null) {
|
||||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||||
result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,10 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.recalibration;
|
package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.Covariate;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDatum;
|
||||||
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class to facilitate on-the-fly base quality score recalibration.
|
* Utility class to facilitate on-the-fly base quality score recalibration.
|
||||||
|
|
@ -39,24 +42,42 @@ public class RecalibrationTables {
|
||||||
public enum TableType {
|
public enum TableType {
|
||||||
READ_GROUP_TABLE(0),
|
READ_GROUP_TABLE(0),
|
||||||
QUALITY_SCORE_TABLE(1),
|
QUALITY_SCORE_TABLE(1),
|
||||||
OPTIONAL_COVARIATE_TABLE(2);
|
OPTIONAL_COVARIATE_TABLES_START(2);
|
||||||
|
|
||||||
private final int index;
|
public final int index;
|
||||||
|
|
||||||
private TableType(final int index) {
|
private TableType(final int index) {
|
||||||
this.index = index;
|
this.index = index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final NestedHashMap[] tables = new NestedHashMap[TableType.values().length];
|
private final IntegerIndexedNestedHashMap[] tables;
|
||||||
|
|
||||||
public RecalibrationTables(final NestedHashMap rgMap, final NestedHashMap qualMap, final NestedHashMap covMap) {
|
public RecalibrationTables(final Covariate[] covariates) {
|
||||||
tables[TableType.READ_GROUP_TABLE.index] = rgMap;
|
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1);
|
||||||
tables[TableType.QUALITY_SCORE_TABLE.index] = qualMap;
|
|
||||||
tables[TableType.OPTIONAL_COVARIATE_TABLE.index] = covMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public NestedHashMap getTable(final TableType type) {
|
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
|
||||||
return tables[type.index];
|
tables = new IntegerIndexedNestedHashMap[covariates.length];
|
||||||
|
|
||||||
|
final int qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1;
|
||||||
|
final int eventDimension = EventType.values().length;
|
||||||
|
|
||||||
|
tables[TableType.READ_GROUP_TABLE.index] = new IntegerIndexedNestedHashMap<RecalDatum>(numReadGroups, eventDimension);
|
||||||
|
tables[TableType.QUALITY_SCORE_TABLE.index] = new IntegerIndexedNestedHashMap<RecalDatum>(numReadGroups, qualDimension, eventDimension);
|
||||||
|
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++)
|
||||||
|
tables[i] = new IntegerIndexedNestedHashMap<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntegerIndexedNestedHashMap<RecalDatum> getTable(final TableType type) {
|
||||||
|
return (IntegerIndexedNestedHashMap<RecalDatum>)tables[type.index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntegerIndexedNestedHashMap<RecalDatum> getTable(final int index) {
|
||||||
|
return (IntegerIndexedNestedHashMap<RecalDatum>)tables[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
public int numTables() {
|
||||||
|
return tables.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
|
|
@ -74,9 +74,9 @@ public class RecalibrationReportUnitTest {
|
||||||
int nKeys = 0; // keep track of how many keys were produced
|
int nKeys = 0; // keep track of how many keys were produced
|
||||||
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
final ReadCovariates rc = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
|
|
||||||
final NestedHashMap rgTable = new NestedHashMap();
|
final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||||
final NestedHashMap qualTable = new NestedHashMap();
|
final IntegerIndexedNestedHashMap<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||||
final NestedHashMap covTable = new NestedHashMap();
|
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||||
|
|
||||||
for (int offset = 0; offset < length; offset++) {
|
for (int offset = 0; offset < length; offset++) {
|
||||||
|
|
||||||
|
|
@ -89,15 +89,14 @@ public class RecalibrationReportUnitTest {
|
||||||
qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
||||||
nKeys += 2;
|
nKeys += 2;
|
||||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||||
covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[2 + j], errorMode.index);
|
final IntegerIndexedNestedHashMap<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||||
|
covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], errorMode.index);
|
||||||
nKeys++;
|
nKeys++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Assert.assertEquals(nKeys, expectedKeys);
|
Assert.assertEquals(nKeys, expectedKeys);
|
||||||
|
|
||||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
|
||||||
|
|
||||||
final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC);
|
final RecalibrationReport report = new RecalibrationReport(quantizationInfo, recalibrationTables, RAC.generateReportTable(), RAC);
|
||||||
|
|
||||||
File output = new File("RecalibrationReportUnitTestOutuput.grp");
|
File output = new File("RecalibrationReportUnitTestOutuput.grp");
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.recalibration;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
import org.broadinstitute.sting.utils.collections.IntegerIndexedNestedHashMap;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
@ -21,7 +21,6 @@ import java.util.*;
|
||||||
public class BaseRecalibrationUnitTest {
|
public class BaseRecalibrationUnitTest {
|
||||||
|
|
||||||
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
private org.broadinstitute.sting.gatk.walkers.recalibration.RecalDataManager dataManager;
|
||||||
private RecalibrationTables recalibrationTables;
|
|
||||||
|
|
||||||
private ReadGroupCovariate rgCovariate;
|
private ReadGroupCovariate rgCovariate;
|
||||||
private QualityScoreCovariate qsCovariate;
|
private QualityScoreCovariate qsCovariate;
|
||||||
|
|
@ -74,9 +73,9 @@ public class BaseRecalibrationUnitTest {
|
||||||
|
|
||||||
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||||
|
|
||||||
final NestedHashMap rgTable = new NestedHashMap();
|
RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||||
final NestedHashMap qualTable = new NestedHashMap();
|
final IntegerIndexedNestedHashMap<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||||
final NestedHashMap covTable = new NestedHashMap();
|
final IntegerIndexedNestedHashMap<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||||
|
|
||||||
for (int i=0; i<read.getReadLength(); i++) {
|
for (int i=0; i<read.getReadLength(); i++) {
|
||||||
final int[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
final int[] bitKeys = readCovariates.getMismatchesKeySet(i);
|
||||||
|
|
@ -96,12 +95,11 @@ public class BaseRecalibrationUnitTest {
|
||||||
rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index);
|
rgTable.put(newDatum, bitKeys[0], EventType.BASE_SUBSTITUTION.index);
|
||||||
qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index);
|
qualTable.put(newDatum, bitKeys[0], bitKeys[1], EventType.BASE_SUBSTITUTION.index);
|
||||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||||
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[2 + j], EventType.BASE_SUBSTITUTION.index);
|
final IntegerIndexedNestedHashMap<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||||
|
covTable.put(newDatum, bitKeys[0], bitKeys[1], j, bitKeys[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], EventType.BASE_SUBSTITUTION.index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
recalibrationTables = new RecalibrationTables(rgTable, qualTable, covTable);
|
|
||||||
|
|
||||||
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
dataManager.generateEmpiricalQualities(1, QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||||
|
|
||||||
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
List<Byte> quantizedQuals = new ArrayList<Byte>();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue