diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index ff1754a10..b89f68e24 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -77,7 +77,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp final byte qual = tempQualArray[eventIndex]; final boolean isError = tempErrorArray[eventIndex]; - final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final NestedIntegerArray rgRecalTable = recalibrationTables.getReadGroupTable(); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgThisDatum = createDatumObject(qual, isError); if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it @@ -85,7 +85,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp else rgPreviousDatum.combine(rgThisDatum); - final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final NestedIntegerArray qualRecalTable = recalibrationTables.getQualityScoreTable(); final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); if (qualPreviousDatum == null) qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); @@ -124,7 +124,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp final byte qual = tempQualArray[eventIndex]; final double isError = tempFractionalErrorArray[eventIndex]; - final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final NestedIntegerArray rgRecalTable = recalibrationTables.getReadGroupTable(); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgThisDatum = createDatumObject(qual, isError); if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it @@ -132,7 +132,7 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp else rgPreviousDatum.combine(rgThisDatum); - final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final NestedIntegerArray qualRecalTable = recalibrationTables.getQualityScoreTable(); final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); if (qualPreviousDatum == null) qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index 2b0f8ca80..4fe9c5323 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -65,7 +65,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); final int eventIndex = EventType.BASE_SUBSTITUTION.index; - final NestedIntegerArray rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); + final NestedIntegerArray rgRecalTable = recalibrationTables.getReadGroupTable(); final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex); final RecalDatum rgThisDatum = createDatumObject(qual, isError); if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it @@ -73,7 +73,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP else rgPreviousDatum.combine(rgThisDatum); - final NestedIntegerArray qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final NestedIntegerArray qualRecalTable = recalibrationTables.getQualityScoreTable(); final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex); if (qualPreviousDatum == null) qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 0af7deec4..7ad9302a8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -166,8 +166,8 @@ public class BaseRecalibration { private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) { final byte qualFromRead = (byte)(long)key[1]; - final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel); - final double deltaQReported = calculateDeltaQReported(recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE), key, errorModel, globalDeltaQ, qualFromRead); + final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getReadGroupTable(), key, errorModel); + final double deltaQReported = calculateDeltaQReported(recalibrationTables.getQualityScoreTable(), key, errorModel, globalDeltaQ, qualFromRead); final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead); double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index d3c6c3d83..f3644fdd8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -34,7 +34,7 @@ public class QuantizationInfo { for (int i = 0; i < qualHistogram.length; i++) qualHistogram[i] = 0L; - final NestedIntegerArray qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); // get the quality score table + final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); // get the quality score table for (final RecalDatum value : qualTable.getAllValues()) { final RecalDatum datum = value; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 1eac6e7e8..7e90d98b9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -68,6 +68,7 @@ public class RecalUtils { public final static String QUALITY_SCORE_REPORT_TABLE_TITLE = "RecalTable1"; public final static String ALL_COVARIATES_REPORT_TABLE_TITLE = "RecalTable2"; + public final static String ARGUMENT_COLUMN_NAME = "Argument"; public final static String ARGUMENT_VALUE_COLUMN_NAME = "Value"; public final static String QUANTIZED_VALUE_COLUMN_NAME = "QuantizedScore"; public static final String QUANTIZED_COUNT_COLUMN_NAME = "Count"; @@ -399,7 +400,7 @@ public class RecalUtils { final NestedHashMap deltaTable = new NestedHashMap(); // add the quality score table to the delta table - final NestedIntegerArray qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); for (final NestedIntegerArray.Leaf leaf : qualTable.getAllLeaves()) { // go through every element in the covariates table to create the delta table final int[] newCovs = new int[4]; newCovs[0] = leaf.keys[0]; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index c7ad3ea1d..527306c85 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -61,9 +61,9 @@ public class RecalibrationReport { recalibrationTables = new RecalibrationTables(requestedCovariates, countReadGroups(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE))); - parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE)); + parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getReadGroupTable()); - parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE)); + parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getQualityScoreTable()); parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); @@ -106,9 +106,9 @@ public class RecalibrationReport { */ public void combine(final RecalibrationReport other) { - for (RecalibrationTables.TableType type : RecalibrationTables.TableType.values()) { - final NestedIntegerArray myTable = recalibrationTables.getTable(type); - final NestedIntegerArray otherTable = other.recalibrationTables.getTable(type); + for ( int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++ ) { + final NestedIntegerArray myTable = recalibrationTables.getTable(tableIndex); + final NestedIntegerArray otherTable = other.recalibrationTables.getTable(tableIndex); for (final NestedIntegerArray.Leaf row : otherTable.getAllLeaves()) { final RecalDatum myDatum = myTable.get(row.keys); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java index f37e69c9a..afc8f5065 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -67,8 +67,12 @@ public class RecalibrationTables { tables[i] = new NestedIntegerArray(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension); } - public NestedIntegerArray getTable(final TableType type) { - return (NestedIntegerArray)tables[type.index]; + public NestedIntegerArray getReadGroupTable() { + return (NestedIntegerArray)tables[TableType.READ_GROUP_TABLE.index]; + } + + public NestedIntegerArray getQualityScoreTable() { + return (NestedIntegerArray)tables[TableType.QUALITY_SCORE_TABLE.index]; } public NestedIntegerArray getTable(final int index) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index f1ffbe80f..220ffa1e1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; +import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.utils.recalibration.RecalUtils; @@ -7,49 +8,70 @@ import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; /** - * @author Mauricio Carneiro - * @since 3/7/12 + * @author Eric Banks + * @since 9/20/12 */ -public class BQSRGathererUnitTest { - RecalibrationArgumentCollection RAC; +public class BQSRGathererUnitTest extends BaseTest { - private static File recal = new File("public/testdata/exampleGRP.grp"); + private static File recal1 = new File(privateTestDir + "HiSeq.1mb.1RG.sg1.table"); + private static File recal2 = new File(privateTestDir + "HiSeq.1mb.1RG.sg2.table"); + private static File recal3 = new File(privateTestDir + "HiSeq.1mb.1RG.sg3.table"); + private static File recal4 = new File(privateTestDir + "HiSeq.1mb.1RG.sg4.table"); + private static File recal5 = new File(privateTestDir + "HiSeq.1mb.1RG.sg5.table"); - //todo -- this test doesnt work because the primary keys in different tables are not the same. Need to either implement "sort" for testing purposes on GATKReport or have a sophisticated comparison measure - @Test(enabled = false) - public void testCombineSimilarFiles() { + private static File recal_original = new File(privateTestDir + "HiSeq.1mb.1RG.noSG.table"); + + @Test(enabled = true) + public void testGatherBQSR() { BQSRGatherer gatherer = new BQSRGatherer(); List recalFiles = new LinkedList (); - File output = new File("foo.grp"); - recalFiles.add(recal); - recalFiles.add(recal); + final File output = BaseTest.createTempFile("BQSRgathererTest", ".table"); + + recalFiles.add(recal1); + recalFiles.add(recal2); + recalFiles.add(recal3); + recalFiles.add(recal4); + recalFiles.add(recal5); gatherer.gather(recalFiles, output); - GATKReport originalReport = new GATKReport(recal); - GATKReport calculatedReport = new GATKReport(output); - for (GATKReportTable originalTable : originalReport.getTables()) { - GATKReportTable calculatedTable = calculatedReport.getTable(originalTable.getTableName()); - List columnsToTest = new LinkedList(); - columnsToTest.add(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME); - columnsToTest.add(RecalUtils.NUMBER_ERRORS_COLUMN_NAME); - if (originalTable.getTableName().equals(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE)) { // these tables must be IDENTICAL - columnsToTest.add(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); - testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 1); - } - - else if (originalTable.getTableName().equals(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE)) { - columnsToTest.add(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); - testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2); - } - - else if (originalTable.getTableName().startsWith("RecalTable")) { - testTablesWithColumnsAndFactor(originalTable, calculatedTable, columnsToTest, 2); - } - } + GATKReport originalReport = new GATKReport(recal_original); + GATKReport calculatedReport = new GATKReport(output); + + + // test the Arguments table + List columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); + GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE); + GATKReportTable calculatedTable = calculatedReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE); + testTablesWithColumns(originalTable, calculatedTable, columnsToTest); + + // test the Quantized table + columnsToTest = Arrays.asList(RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); + originalTable = originalReport.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); + calculatedTable = calculatedReport.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); + testTablesWithColumns(originalTable, calculatedTable, columnsToTest); + + // test the RecalTable0 table + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + originalTable = originalReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); + calculatedTable = calculatedReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); + testTablesWithColumns(originalTable, calculatedTable, columnsToTest); + + // test the RecalTable1 table + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + originalTable = originalReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE); + calculatedTable = calculatedReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE); + testTablesWithColumns(originalTable, calculatedTable, columnsToTest); + + // test the RecalTable2 table + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + originalTable = originalReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE); + calculatedTable = calculatedReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE); + testTablesWithColumns(originalTable, calculatedTable, columnsToTest); } /** @@ -58,25 +80,12 @@ public class BQSRGathererUnitTest { * @param original the original table * @param calculated the calculated table * @param columnsToTest list of columns to test. All columns will be tested with the same criteria (equality given factor) - * @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated */ - private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List columnsToTest, int factor) { + private void testTablesWithColumns(GATKReportTable original, GATKReportTable calculated, List columnsToTest) { for (int row = 0; row < original.getNumRows(); row++ ) { for (String column : columnsToTest) { Object actual = calculated.get(new Integer(row), column); Object expected = original.get(row, column); - - if (factor != 1) { - if (expected instanceof Double) - expected = (Double) expected * factor; - else if (expected instanceof Long) - expected = (Long) expected * factor; - else if (expected instanceof Integer) - expected = (Integer) expected * factor; - else if (expected instanceof Byte) { - expected = (Byte) expected * factor; - } - } Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName()); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index 485da243f..d597b9f2c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -76,8 +76,8 @@ public class RecalibrationReportUnitTest { final ReadCovariates rc = RecalUtils.computeCovariates(read, requestedCovariates); final RecalibrationTables recalibrationTables = new RecalibrationTables(requestedCovariates); - final NestedIntegerArray rgTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE); - final NestedIntegerArray qualTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE); + final NestedIntegerArray rgTable = recalibrationTables.getReadGroupTable(); + final NestedIntegerArray qualTable = recalibrationTables.getQualityScoreTable(); for (int offset = 0; offset < length; offset++) {