diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index bc7c5a1ce..f3d4a3096 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -188,7 +188,7 @@ public class BaseRecalibration { } } - protected double hierarchicalBayesianQualityEstimate( final double epsilon, final RecalDatum empiricalQualRG, final RecalDatum empiricalQualQS, final List empiricalQualCovs ) { + protected static double hierarchicalBayesianQualityEstimate( final double epsilon, final RecalDatum empiricalQualRG, final RecalDatum empiricalQualQS, final List empiricalQualCovs ) { final double globalDeltaQ = ( empiricalQualRG == null ? 0.0 : empiricalQualRG.getEmpiricalQuality(epsilon) - epsilon ); final double deltaQReported = ( empiricalQualQS == null ? 0.0 : empiricalQualQS.getEmpiricalQuality(globalDeltaQ + epsilon) - (globalDeltaQ + epsilon) ); double deltaQCovariates = 0.0; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 0b4441439..bd0063a7d 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -181,6 +181,7 @@ public class RecalDatum { if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN"); this.estimatedQReported = estimatedQReported; + empiricalQuality = UNINITIALIZED; } public final double getEstimatedQReported() { diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index 2f9a38972..f10c26ddc 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -131,7 +131,7 @@ public class RecalibrationReport { * Combines two recalibration reports by adding all observations and errors * * Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate - * them after combining. The reason for not calculating it is because this function is inteded for combining a + * them after combining. The reason for not calculating it is because this function is intended for combining a * series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized * qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate, * makes this method faster diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index 2815599d9..f82f24439 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -125,19 +125,19 @@ public class BQSRGathererUnitTest extends BaseTest { testTablesWithColumns(originalTable, calculatedTable, columnsToTest); // test the RecalTable0 table - columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); originalTable = originalReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); calculatedTable = calculatedReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); testTablesWithColumns(originalTable, calculatedTable, columnsToTest); // test the RecalTable1 table - columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); originalTable = originalReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE); calculatedTable = calculatedReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE); testTablesWithColumns(originalTable, calculatedTable, columnsToTest); // test the RecalTable2 table - columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); + columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME); originalTable = originalReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE); calculatedTable = calculatedReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE); testTablesWithColumns(originalTable, calculatedTable, columnsToTest); diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java index 09f751fbc..37f0fcc0c 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -58,6 +58,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Arrays; +import java.util.Collections; public class RecalDatumUnitTest extends BaseTest { @@ -277,4 +278,36 @@ public class RecalDatumUnitTest extends BaseTest { Assert.assertFalse(Double.isInfinite(log10likelihood)); Assert.assertFalse(Double.isNaN(log10likelihood)); } + + @Test + public void basicHierarchicalBayesianQualityEstimateTest() { + { + double RG_Q = 45.0; + RecalDatum RG = new RecalDatum( (long)10000000, (long) (10000000 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q); + double Q = 30.0; + RecalDatum QS = new RecalDatum( (long)10000000, (long) (10000000 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q); + RecalDatum COV = new RecalDatum( (long)150, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality + + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 45.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 ); + // initial epsilon condition shouldn't matter when there are a lot of observations + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 25.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 ); + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 5.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 ); + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 65.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 ); + } + + { + double RG_Q = 45.0; + RecalDatum RG = new RecalDatum( (long)100, (long) (100 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q); + double Q = 30.0; + RecalDatum QS = new RecalDatum( (long)100, (long) (100 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q); + RecalDatum COV = new RecalDatum( (long)150, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality + + // initial epsilon condition dominates when there is no data + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 45.0, RG, QS, Collections.singletonList(COV)), 45.0, 1E-4 ); + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 25.0, RG, QS, Collections.singletonList(COV)), 25.0, 1E-4 ); + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 5.0, RG, QS, Collections.singletonList(COV)), 5.0, 1E-4 ); + Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 65.0, RG, QS, Collections.singletonList(COV)), 65.0, 1E-4 ); + } + + } } \ No newline at end of file