Adding unit tests for hierarchicalBayesianQualityEstimate function

This commit is contained in:
Ryan Poplin 2013-01-30 13:26:07 -05:00
parent 07fe3dd1ef
commit 85dabd321f
2 changed files with 17 additions and 18 deletions

View File

@ -46,6 +46,7 @@
package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
@ -188,6 +189,7 @@ public class BaseRecalibration {
}
}
@Ensures("result > 0.0")
protected static double hierarchicalBayesianQualityEstimate( final double epsilon, final RecalDatum empiricalQualRG, final RecalDatum empiricalQualQS, final List<RecalDatum> empiricalQualCovs ) {
final double globalDeltaQ = ( empiricalQualRG == null ? 0.0 : empiricalQualRG.getEmpiricalQuality(epsilon) - epsilon );
final double deltaQReported = ( empiricalQualQS == null ? 0.0 : empiricalQualQS.getEmpiricalQuality(globalDeltaQ + epsilon) - (globalDeltaQ + epsilon) );

View File

@ -50,6 +50,7 @@ package org.broadinstitute.sting.utils.recalibration;
// the imports for unit testing.
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -57,6 +58,7 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -281,32 +283,27 @@ public class RecalDatumUnitTest extends BaseTest {
@Test
public void basicHierarchicalBayesianQualityEstimateTest() {
{
double RG_Q = 45.0;
RecalDatum RG = new RecalDatum( (long)10000000, (long) (10000000 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q);
double Q = 30.0;
RecalDatum QS = new RecalDatum( (long)10000000, (long) (10000000 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q);
RecalDatum COV = new RecalDatum( (long)150, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 45.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 );
for( double epsilon = 15.0; epsilon <= 60.0; epsilon += 2.0 ) {
double RG_Q = 45.0;
RecalDatum RG = new RecalDatum( (long)100000000, (long) (100000000 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q);
double Q = 30.0;
RecalDatum QS = new RecalDatum( (long)100000000, (long) (100000000 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q);
RecalDatum COV = new RecalDatum( (long)15, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality
// initial epsilon condition shouldn't matter when there are a lot of observations
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 25.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 5.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 65.0, RG, QS, Collections.singletonList(COV)), Q, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( epsilon, RG, QS, Collections.singletonList(COV)), Q, 1E-4 );
}
{
for( double epsilon = 15.0; epsilon <= 60.0; epsilon += 2.0 ) {
double RG_Q = 45.0;
RecalDatum RG = new RecalDatum( (long)100, (long) (100 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q);
RecalDatum RG = new RecalDatum( (long)10, (long) (10 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q);
double Q = 30.0;
RecalDatum QS = new RecalDatum( (long)100, (long) (100 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q);
RecalDatum COV = new RecalDatum( (long)150, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality
RecalDatum QS = new RecalDatum( (long)10, (long) (10 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q);
RecalDatum COV = new RecalDatum( (long)15, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality
// initial epsilon condition dominates when there is no data
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 45.0, RG, QS, Collections.singletonList(COV)), 45.0, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 25.0, RG, QS, Collections.singletonList(COV)), 25.0, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 5.0, RG, QS, Collections.singletonList(COV)), 5.0, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( 65.0, RG, QS, Collections.singletonList(COV)), 65.0, 1E-4 );
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( epsilon, RG, QS, Collections.singletonList(COV)), epsilon, 1E-4 );
}
}