Bad news folks: BQSR scatter-gather was totally busted; you absolutely cannot trust any BQSR table that was a product of SG (for any version of BQSR). I fixed BQSR-gathering, rewrote (and enabled) the unit test, and confirmed that outputs are now identical whether or not SG is used to create the table.
This commit is contained in:
parent
2e6f533996
commit
1316b579f0
|
|
@ -77,7 +77,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
final byte qual = tempQualArray[eventIndex];
|
||||
final boolean isError = tempErrorArray[eventIndex];
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
|
|
@ -85,7 +85,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
|
|
@ -124,7 +124,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
final byte qual = tempQualArray[eventIndex];
|
||||
final double isError = tempFractionalErrorArray[eventIndex];
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
|
|
@ -132,7 +132,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
|
||||
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getReadGroupTable();
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
|
|
@ -73,7 +73,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getQualityScoreTable();
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
|
|
|
|||
|
|
@ -166,8 +166,8 @@ public class BaseRecalibration {
|
|||
private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead);
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getReadGroupTable(), key, errorModel);
|
||||
final double deltaQReported = calculateDeltaQReported(recalibrationTables.getQualityScoreTable(), key, errorModel, globalDeltaQ, qualFromRead);
|
||||
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead);
|
||||
|
||||
double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ public class QuantizationInfo {
|
|||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable(); // get the quality score table
|
||||
|
||||
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = value;
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ public class RecalUtils {
|
|||
public final static String QUALITY_SCORE_REPORT_TABLE_TITLE = "RecalTable1";
|
||||
public final static String ALL_COVARIATES_REPORT_TABLE_TITLE = "RecalTable2";
|
||||
|
||||
public final static String ARGUMENT_COLUMN_NAME = "Argument";
|
||||
public final static String ARGUMENT_VALUE_COLUMN_NAME = "Value";
|
||||
public final static String QUANTIZED_VALUE_COLUMN_NAME = "QuantizedScore";
|
||||
public static final String QUANTIZED_COUNT_COLUMN_NAME = "Count";
|
||||
|
|
@ -399,7 +400,7 @@ public class RecalUtils {
|
|||
final NestedHashMap deltaTable = new NestedHashMap();
|
||||
|
||||
// add the quality score table to the delta table
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
|
||||
for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table
|
||||
final int[] newCovs = new int[4];
|
||||
newCovs[0] = leaf.keys[0];
|
||||
|
|
|
|||
|
|
@ -61,9 +61,9 @@ public class RecalibrationReport {
|
|||
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE)));
|
||||
|
||||
parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE));
|
||||
parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getReadGroupTable());
|
||||
|
||||
parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE));
|
||||
parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getQualityScoreTable());
|
||||
|
||||
parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
|
||||
|
||||
|
|
@ -106,9 +106,9 @@ public class RecalibrationReport {
|
|||
*/
|
||||
public void combine(final RecalibrationReport other) {
|
||||
|
||||
for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) {
|
||||
final NestedIntegerArray<RecalDatum> myTable = recalibrationTables.getTable(type);
|
||||
final NestedIntegerArray<RecalDatum> otherTable = other.recalibrationTables.getTable(type);
|
||||
for ( int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++ ) {
|
||||
final NestedIntegerArray<RecalDatum> myTable = recalibrationTables.getTable(tableIndex);
|
||||
final NestedIntegerArray<RecalDatum> otherTable = other.recalibrationTables.getTable(tableIndex);
|
||||
|
||||
for (final NestedIntegerArray.Leaf row : otherTable.getAllLeaves()) {
|
||||
final RecalDatum myDatum = myTable.get(row.keys);
|
||||
|
|
|
|||
|
|
@ -67,8 +67,12 @@ public class RecalibrationTables {
|
|||
tables[i] = new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension);
|
||||
}
|
||||
|
||||
public NestedIntegerArray<RecalDatum> getTable(final TableType type) {
|
||||
return (NestedIntegerArray<RecalDatum>)tables[type.index];
|
||||
public NestedIntegerArray<RecalDatum> getReadGroupTable() {
|
||||
return (NestedIntegerArray<RecalDatum>)tables[TableType.READ_GROUP_TABLE.index];
|
||||
}
|
||||
|
||||
public NestedIntegerArray<RecalDatum> getQualityScoreTable() {
|
||||
return (NestedIntegerArray<RecalDatum>)tables[TableType.QUALITY_SCORE_TABLE.index];
|
||||
}
|
||||
|
||||
public NestedIntegerArray<RecalDatum> getTable(final int index) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
|
|
@ -7,49 +8,70 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Mauricio Carneiro
|
||||
* @since 3/7/12
|
||||
* @author Eric Banks
|
||||
* @since 9/20/12
|
||||
*/
|
||||
public class BQSRGathererUnitTest {
|
||||
RecalibrationArgumentCollection RAC;
|
||||
public class BQSRGathererUnitTest extends BaseTest {
|
||||
|
||||
private static File recal = new File("public/testdata/exampleGRP.grp");
|
||||
private static File recal1 = new File(privateTestDir + "HiSeq.1mb.1RG.sg1.table");
|
||||
private static File recal2 = new File(privateTestDir + "HiSeq.1mb.1RG.sg2.table");
|
||||
private static File recal3 = new File(privateTestDir + "HiSeq.1mb.1RG.sg3.table");
|
||||
private static File recal4 = new File(privateTestDir + "HiSeq.1mb.1RG.sg4.table");
|
||||
private static File recal5 = new File(privateTestDir + "HiSeq.1mb.1RG.sg5.table");
|
||||
|
||||
//todo -- this test doesnt work because the primary keys in different tables are not the same. Need to either implement "sort" for testing purposes on GATKReport or have a sophisticated comparison measure
|
||||
@Test(enabled = false)
|
||||
public void testCombineSimilarFiles() {
|
||||
private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table");
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testGatherBQSR() {
|
||||
BQSRGatherer gatherer = new BQSRGatherer();
|
||||
List<File> recalFiles = new LinkedList<File> ();
|
||||
File output = new File("foo.grp");
|
||||
recalFiles.add(recal);
|
||||
recalFiles.add(recal);
|
||||
final File output = BaseTest.createTempFile("BQSRgathererTest", ".table");
|
||||
|
||||
recalFiles.add(recal1);
|
||||
recalFiles.add(recal2);
|
||||
recalFiles.add(recal3);
|
||||
recalFiles.add(recal4);
|
||||
recalFiles.add(recal5);
|
||||
gatherer.gather(recalFiles, output);
|
||||
|
||||
GATKReport originalReport = new GATKReport(recal);
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
for (GATKReportTable originalTable : originalReport.getTables()) {
|
||||
GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName());
|
||||
List<String> columnsToTest = new LinkedList<String>();
|
||||
columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME);
|
||||
columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL
|
||||
columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1);
|
||||
}
|
||||
|
||||
else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) {
|
||||
columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
|
||||
}
|
||||
|
||||
else if (originalTable.getTableName().startsWith("RecalTable")) {
|
||||
testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2);
|
||||
}
|
||||
}
|
||||
GATKReport originalReport = new GATKReport(recal_original);
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
|
||||
// test the Arguments table
|
||||
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
GATKReportTable calculatedTable = calculatedReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
|
||||
|
||||
// test the Quantized table
|
||||
columnsToTest = Arrays.asList(RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
originalTable = originalReport.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
||||
calculatedTable = calculatedReport.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
||||
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
|
||||
|
||||
// test the RecalTable0 table
|
||||
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
originalTable = originalReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE);
|
||||
calculatedTable = calculatedReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE);
|
||||
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
|
||||
|
||||
// test the RecalTable1 table
|
||||
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
originalTable = originalReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
calculatedTable = calculatedReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE);
|
||||
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
|
||||
|
||||
// test the RecalTable2 table
|
||||
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
|
||||
originalTable = originalReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
calculatedTable = calculatedReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE);
|
||||
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -58,25 +80,12 @@ public class BQSRGathererUnitTest {
|
|||
* @param original the original table
|
||||
* @param calculated the calculated table
|
||||
* @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor)
|
||||
* @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated
|
||||
*/
|
||||
private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest, int factor) {
|
||||
private void testTablesWithColumns(GATKReportTable original, GATKReportTable calculated, List<String> columnsToTest) {
|
||||
for (int row = 0; row < original.getNumRows(); row++ ) {
|
||||
for (String column : columnsToTest) {
|
||||
Object actual = calculated.get(new Integer(row), column);
|
||||
Object expected = original.get(row, column);
|
||||
|
||||
if (factor != 1) {
|
||||
if (expected instanceof Double)
|
||||
expected = (Double) expected * factor;
|
||||
else if (expected instanceof Long)
|
||||
expected = (Long) expected * factor;
|
||||
else if (expected instanceof Integer)
|
||||
expected = (Integer) expected * factor;
|
||||
else if (expected instanceof Byte) {
|
||||
expected = (Byte) expected * factor;
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,8 +76,8 @@ public class RecalibrationReportUnitTest {
|
|||
final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
|
||||
final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates);
|
||||
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final NestedIntegerArray<RecalDatum> rgTable = recalibrationTables.getReadGroupTable();
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable();
|
||||
|
||||
for (int offset = 0; offset < length; offset++) {
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue