BQSR: allow logging recal table updates to a file

For testing/debugging purposes only
This commit is contained in:
David Roazen 2012-10-01 13:54:26 -04:00
parent 2508b0f5a7
commit ac87ed47bb
4 changed files with 103 additions and 5 deletions

View File

@ -179,7 +179,7 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups);
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG);
recalibrationEngine = initializeRecalibrationEngine();
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);

View File

@ -182,6 +182,10 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Hidden
@Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", required = false, doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only")
public PrintStream RECAL_TABLE_UPDATE_LOG = null;
public File existingRecalibrationReport = null;
public GATKReportTable generateReportTable(final String covariateNames) {

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.collections;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.PrintStream;
/**
* Wrapper around the basic NestedIntegerArray class that logs all updates (ie., all calls to put())
* to the provided output stream. For testing/debugging purposes.
*
* Log entries are of the following form (fields are tab-separated):
* LABEL VALUE KEY1 KEY2 ... KEY_N
*
* @author David Roazen
*/
public class LoggingNestedIntegerArray<T> extends NestedIntegerArray<T> {
private PrintStream log;
private String logEntryLabel;
/**
*
* @param log output stream to which to log update operations
* @param logEntryLabel String that should be prefixed to each log entry
* @param dimensions
*/
public LoggingNestedIntegerArray( PrintStream log, String logEntryLabel, final int... dimensions ) {
super(dimensions);
if ( log == null ) {
throw new ReviewedStingException("Log output stream must not be null");
}
this.log = log;
this.logEntryLabel = logEntryLabel != null ? logEntryLabel : "";
}
@Override
public void put( final T value, final int... keys ) {
super.put(value, keys);
StringBuilder logEntry = new StringBuilder();
logEntry.append(logEntryLabel);
logEntry.append("\t");
logEntry.append(value);
for ( int key : keys ) {
logEntry.append("\t");
logEntry.append(key);
}
// PrintStream methods all use synchronized blocks internally, so our logging is thread-safe
log.println(logEntry.toString());
}
}

View File

@ -25,9 +25,12 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.utils.collections.LoggingNestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import java.io.PrintStream;
/**
* Utility class to facilitate on-the-fly base quality score recalibration.
*
@ -52,19 +55,31 @@ public class RecalibrationTables {
private final NestedIntegerArray[] tables;
public RecalibrationTables(final Covariate[] covariates) {
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1);
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, null);
}
public RecalibrationTables(final Covariate[] covariates, final PrintStream log) {
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, log);
}
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
this(covariates, numReadGroups, null);
}
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups, final PrintStream log) {
tables = new NestedIntegerArray[covariates.length];
final int qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1;
final int eventDimension = EventType.values().length;
tables[TableType.READ_GROUP_TABLE.index] = new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension);
tables[TableType.QUALITY_SCORE_TABLE.index] = new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, eventDimension);
tables[TableType.READ_GROUP_TABLE.index] = log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension) :
new LoggingNestedIntegerArray<RecalDatum>(log, "READ_GROUP_TABLE", numReadGroups, eventDimension);
tables[TableType.QUALITY_SCORE_TABLE.index] = log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, eventDimension) :
new LoggingNestedIntegerArray<RecalDatum>(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension);
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++)
tables[i] = new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension);
tables[i] = log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) :
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.index + 1),
numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension);
}
public NestedIntegerArray<RecalDatum> getReadGroupTable() {