diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 200a250f2..26023bd2f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -454,8 +454,7 @@ public class HaplotypeIndelErrorModel { // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2) // First term is approximated by Jacobian log with table lookup. // Second term is a constant added to both likelihoods so will be ignored - haplotypeLikehoodMatrix[i][j] += MathUtils.softMax(readLikelihood[0], - readLikelihood[1]); + haplotypeLikehoodMatrix[i][j] += MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6410d619d..64993b43a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -166,18 +166,17 @@ public class PairHMMIndelErrorModel { final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual]; - matchMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead, - YMetricArray[im1][jm1] + pBaseRead); + matchMetricArray[indI][indJ] = pBaseRead + MathUtils.approximateLog10SumLog10(new double[]{matchMetricArray[im1][jm1], XMetricArray[im1][jm1], YMetricArray[im1][jm1]}); final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1]; final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1]; - XMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1); + XMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1); // update Y array final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1]; final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1]; - YMetricArray[indI][indJ] = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2); + YMetricArray[indI][indJ] = MathUtils.approximateLog10SumLog10(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2); } } @@ -316,9 +315,7 @@ public class PairHMMIndelErrorModel { final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1; - final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ], - XMetricArray[bestI][bestJ], - YMetricArray[bestI][bestJ]); + final double bestMetric = MathUtils.approximateLog10SumLog10(new double[]{ matchMetricArray[bestI][bestJ], XMetricArray[bestI][bestJ], YMetricArray[bestI][bestJ] }); /* if (DEBUG) { @@ -651,7 +648,7 @@ public class PairHMMIndelErrorModel { private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) { final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes]; - // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix + // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplified to just a single loop without the intermediate NxN matrix for (int i=0; i < numHaplotypes; i++) { for (int j=i; j < numHaplotypes; j++){ // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j] @@ -665,7 +662,7 @@ public class PairHMMIndelErrorModel { final double li = readLikelihoods[readIdx][i]; final double lj = readLikelihoods[readIdx][j]; final int readCount = readCounts[readIdx]; - haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF); + haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.approximateLog10SumLog10(li, lj) + LOG_ONE_HALF); } } } @@ -678,7 +675,7 @@ public class PairHMMIndelErrorModel { } } - // renormalize so that max element is zero. + // renormalize so that max element is zero. return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 82776ca2e..6f0ae7c25 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -140,15 +140,16 @@ public class GaussianMixtureModel { } for( final VariantDatum datum : data ) { - final ArrayList pVarInGaussianLog10 = new ArrayList( gaussians.size() ); + final double[] pVarInGaussianLog10 = new double[gaussians.size()]; + int gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { final double pVarLog10 = gaussian.evaluateDatumLog10( datum ); - pVarInGaussianLog10.add( pVarLog10 ); + pVarInGaussianLog10[gaussianIndex++] = pVarLog10; } - final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10 ); - int iii = 0; + final double[] pVarInGaussianNormalized = MathUtils.normalizeFromLog10( pVarInGaussianLog10, false ); + gaussianIndex = 0; for( final MultivariateGaussian gaussian : gaussians ) { - gaussian.assignPVarInGaussian( pVarInGaussianNormalized[iii++] ); //BUGBUG: to clean up + gaussian.assignPVarInGaussian( pVarInGaussianNormalized[gaussianIndex++] ); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 6f2db67ee..a96cbffc5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -41,14 +41,6 @@ import java.util.*; * @author Kiran Garimella */ public class MathUtils { - /** Public constants - used for the Lanczos approximation to the factorial function - * (for the calculation of the binomial/multinomial probability in logspace) - * @param LANC_SEQ[] - an array holding the constants which correspond to the product - * of Chebyshev Polynomial coefficients, and points on the Gamma function (for interpolation) - * [see A Precision Approximation of the Gamma Function J. SIAM Numer. Anal. Ser. B, Vol. 1 1964. pp. 86-96] - * @param LANC_G - a value for the Lanczos approximation to the gamma function that works to - * high precision - */ /** * Private constructor. No instantiating this class! @@ -56,6 +48,28 @@ public class MathUtils { private MathUtils() { } + public static final double[] log10Cache; + private static final double[] jacobianLogTable; + private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; + private static final double MAX_JACOBIAN_TOLERANCE = 10.0; + private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; + private static final int MAXN = 11000; + private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients + + static { + log10Cache = new double[LOG10_CACHE_SIZE]; + jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; + + log10Cache[0] = Double.NEGATIVE_INFINITY; + for (int k = 1; k < LOG10_CACHE_SIZE; k++) + log10Cache[k] = Math.log10(k); + + for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { + jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); + + } + } + // A fast implementation of the Math.round() method. This method does not perform // under/overflow checking, so this shouldn't be used in the general case (but is fine // if one is already make those checks before calling in to the rounding). @@ -536,7 +550,7 @@ public class MathUtils { // all negative) the largest value; also, we need to convert to normal-space. double maxValue = Utils.findMaxEntry(array); - // we may decide to just normalize in log space with converting to linear space + // we may decide to just normalize in log space without converting to linear space if (keepInLogSpace) { for (int i = 0; i < array.length; i++) array[i] -= maxValue; @@ -563,29 +577,6 @@ public class MathUtils { return normalized; } - public static double[] normalizeFromLog10(List array, boolean takeLog10OfOutput) { - double[] normalized = new double[array.size()]; - - // for precision purposes, we need to add (or really subtract, since they're - // all negative) the largest value; also, we need to convert to normal-space. - double maxValue = MathUtils.arrayMaxDouble(array); - for (int i = 0; i < array.size(); i++) - normalized[i] = Math.pow(10, array.get(i) - maxValue); - - // normalize - double sum = 0.0; - for (int i = 0; i < array.size(); i++) - sum += normalized[i]; - for (int i = 0; i < array.size(); i++) { - double x = normalized[i] / sum; - if (takeLog10OfOutput) - x = Math.log10(x); - normalized[i] = x; - } - - return normalized; - } - /** * normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE). * @@ -596,10 +587,6 @@ public class MathUtils { return normalizeFromLog10(array, false); } - public static double[] normalizeFromLog10(List array) { - return normalizeFromLog10(array, false); - } - public static int maxElementIndex(final double[] array) { return maxElementIndex(array, array.length); } @@ -1207,78 +1194,11 @@ public class MathUtils { return ((double) num) / (Math.max(denom, 1)); } - public static final double[] log10Cache; - public static final double[] jacobianLogTable; - public static final int JACOBIAN_LOG_TABLE_SIZE = 101; - public static final double JACOBIAN_LOG_TABLE_STEP = 0.1; - public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0 / JACOBIAN_LOG_TABLE_STEP; - public static final double MAX_JACOBIAN_TOLERANCE = 10.0; - private static final int MAXN = 11000; - private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients - - static { - log10Cache = new double[LOG10_CACHE_SIZE]; - jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; - - log10Cache[0] = Double.NEGATIVE_INFINITY; - for (int k = 1; k < LOG10_CACHE_SIZE; k++) - log10Cache[k] = Math.log10(k); - - for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { - jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); - - } - } - - static public double softMax(final double[] vec) { - double acc = vec[0]; - for (int k = 1; k < vec.length; k++) - acc = softMax(acc, vec[k]); - - return acc; - - } - static public double max(double x0, double x1, double x2) { double a = Math.max(x0, x1); return Math.max(a, x2); } - static public double softMax(final double x0, final double x1, final double x2) { - // compute naively log10(10^x[0] + 10^x[1]+...) - // return Math.log10(MathUtils.sumLog10(vec)); - - // better approximation: do Jacobian logarithm function on data pairs - double a = softMax(x0, x1); - return softMax(a, x2); - } - - static public double softMax(final double x, final double y) { - // we need to compute log10(10^x + 10^y) - // By Jacobian logarithm identity, this is equal to - // max(x,y) + log10(1+10^-abs(x-y)) - // we compute the second term as a table lookup - // with integer quantization - - // slow exact version: - // return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y)); - - double diff = x - y; - - if (diff > MAX_JACOBIAN_TOLERANCE) - return x; - else if (diff < -MAX_JACOBIAN_TOLERANCE) - return y; - else if (diff >= 0) { - int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); - return x + jacobianLogTable[ind]; - } - else { - int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); - return y + jacobianLogTable[ind]; - } - } - public static double phredScaleToProbability(byte q) { return Math.pow(10, (-q) / 10.0); } @@ -1734,6 +1654,4 @@ public class MathUtils { return bitSetFrom(baseTen+preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. } - - } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 823eeeeb9..cfb0d11a1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("202b337ebbea3def1be8495eb363dfa8")); + Arrays.asList("8f81a14fffc1a59b4b066f8595dc1232")); executeTest("test MultiSample Pilot1", spec); } @@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ae29b9c9aacce8046dc780430540cd62")); + Arrays.asList("c5b53231f4f6d9524bc4ec8115f44f5c")); executeTest("test SingleSample Pilot2", spec); } @@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1, - Arrays.asList("10027d13befaa07b7900a7af0ae0791c")); + Arrays.asList("5af005255240a2186f04cb50851b8b6f")); executeTest("test Multiple SNP alleles", spec); } @@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65"; + private final static String COMPRESSED_OUTPUT_MD5 = "a08df9aea2b3df09cf90ff8e6e3be3ea"; @Test public void testCompressedOutput() { @@ -91,7 +91,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "32a34362dff51d8b73a3335048516d82"; + String md5 = "6358934c1c26345013a38261b8c45aa4"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -179,8 +179,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "2cb2544739e01f6c08fd820112914317" ); - e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" ); + e.put( 0.01, "926b58038dd4989bf7eda697a847eea9" ); + e.put( 1.0 / 1850, "93f44105b43b65730a3b821e27b0fa16" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -204,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("f0fbe472f155baf594b1eeb58166edef")); + Arrays.asList("a1b75a7e12b160b0be823228c958573f")); executeTest(String.format("test multiple technologies"), spec); } @@ -223,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272")); + Arrays.asList("3bda1279cd6dcb47885f3e19466f11b9")); executeTest(String.format("test calling with BAQ"), spec); } @@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899")); + Arrays.asList("d9fc3ba94a0d46029778c7b457e7292a")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -257,7 +257,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6")); + Arrays.asList("b2e30ae3e5ffa6108f9f6178b1d2e679")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f")); + Arrays.asList("2cd182a84613fa91a6020466d2d327e2")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd")); + Arrays.asList("9cd08dc412a007933381e9c76c073899")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); } @@ -290,7 +290,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("320f61c87253aba77d6dc782242cba8b")); + Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("c9897b80615c53a4ea10a4b193d56d9c")); + Arrays.asList("2609675a356f2dfc86f8a1d911210978")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("5282fdb1711a532d726c13507bf80a21")); + Arrays.asList("4fdd8da77167881b71b3547da5c13f94")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 75fc44873..1ba6c74d4 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -205,6 +205,24 @@ public class MathUtilsUnitTest extends BaseTest { } } + /** + * Private functions used by testArrayShuffle() + */ + private boolean hasUniqueElements(Object[] x) { + for (int i = 0; i < x.length; i++) + for (int j = i + 1; j < x.length; j++) + if (x[i].equals(x[j]) || x[i] == x[j]) + return false; + return true; + } + + private boolean hasAllElements(final Object[] expected, final Object[] actual) { + HashSet set = new HashSet(); + set.addAll(Arrays.asList(expected)); + set.removeAll(Arrays.asList(actual)); + return set.isEmpty(); + } + @Test(enabled = true) public void testIntAndBitSetConversion() { Assert.assertEquals(MathUtils.intFrom(MathUtils.bitSetFrom(428)), 428); @@ -234,26 +252,71 @@ public class MathUtilsUnitTest extends BaseTest { Assert.assertEquals(MathUtils.dnaFrom(MathUtils.bitSetFrom("AACGTCAATGCAGTCAAGTCAGACGTGGGTT")), "AACGTCAATGCAGTCAAGTCAGACGTGGGTT"); // testing max precision (length == 31) } + @Test + public void testApproximateLog10SumLog10() { + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, 0.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, 0.0), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(0.0, -1.0), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-2.2, -3.5), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-1.0, -7.1), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(5.0, 6.2), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(38.1, 16.2), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-38.1, 6.2), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-19.1, -37.1), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-29.1, -27.6), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-0.12345, -0.23456), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(-15.7654, -17.0101), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); - private boolean hasUniqueElements(Object[] x) { - for (int i = 0; i < x.length; i++) - for (int j = i + 1; j < x.length; j++) - if (x[i].equals(x[j]) || x[i] == x[j]) - return false; + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101)), 1e-3); + + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, 0.0, 0.0}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, 0.0) + Math.pow(10.0, 0.0)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{0.0, -1.0, -2.5}), Math.log10(Math.pow(10.0, 0.0) + Math.pow(10.0, -1.0) + Math.pow(10.0, -2.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-2.2, -3.5, -1.1}), Math.log10(Math.pow(10.0, -2.2) + Math.pow(10.0, -3.5) + Math.pow(10.0, -1.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-1.0, -7.1, 0.5}), Math.log10(Math.pow(10.0, -1.0) + Math.pow(10.0, -7.1) + Math.pow(10.0, 0.5)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{5.0, 6.2, 1.3}), Math.log10(Math.pow(10.0, 5.0) + Math.pow(10.0, 6.2) + Math.pow(10.0, 1.3)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{38.1, 16.2, 18.1}), Math.log10(Math.pow(10.0, 38.1) + Math.pow(10.0, 16.2) + Math.pow(10.0, 18.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-38.1, 6.2, 26.6}), Math.log10(Math.pow(10.0, -38.1) + Math.pow(10.0, 6.2) + Math.pow(10.0, 26.6)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-19.1, -37.1, -45.1}), Math.log10(Math.pow(10.0, -19.1) + Math.pow(10.0, -37.1) + Math.pow(10.0, -45.1)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-29.1, -27.6, -26.2}), Math.log10(Math.pow(10.0, -29.1) + Math.pow(10.0, -27.6) + Math.pow(10.0, -26.2)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-0.12345, -0.23456, -0.34567}), Math.log10(Math.pow(10.0, -0.12345) + Math.pow(10.0, -0.23456) + Math.pow(10.0, -0.34567)), 1e-3); + Assert.assertEquals(MathUtils.approximateLog10SumLog10(new double[]{-15.7654, -17.0101, -17.9341}), Math.log10(Math.pow(10.0, -15.7654) + Math.pow(10.0, -17.0101) + Math.pow(10.0, -17.9341)), 1e-3); + } + + @Test + public void testNormalizeFromLog10() { + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{0.0, 0.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, -1.0, -1.1, -7.8})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.1, -7.8}, false, true), new double[]{0.0, 0.0, 0.0, -0.1, -6.8})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-10.0, -7.8, -10.5, -1.1, -10.0}, false, true), new double[]{-8.9, -6.7, -9.4, 0.0, -8.9})); + + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -1.0, -1.0, -1.0}), new double[]{0.25, 0.25, 0.25, 0.25})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -1.0}), new double[]{0.1 * 1.0 / 0.301, 0.001 * 1.0 / 0.301, 0.1 * 1.0 / 0.301, 0.1 * 1.0 / 0.301})); + Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[]{-1.0, -3.0, -1.0, -2.0}), new double[]{0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211})); + } + + /** + * Private function used by testNormalizeFromLog10() + */ + private boolean compareDoubleArrays(double[] b1, double[] b2) { + if( b1.length != b2.length ) { + return false; // sanity check + } + + for( int i=0; i < b1.length; i++ ){ + if ( MathUtils.compareDoubles(b1[i], b2[i]) != 0 ) + return false; + } return true; } - - private boolean hasAllElements(final Object[] expected, final Object[] actual) { - HashSet set = new HashSet(); - set.addAll(Arrays.asList(expected)); - set.removeAll(Arrays.asList(actual)); - return set.isEmpty(); - } - - private void p (Object []x) { - for (Object v: x) - System.out.print((Integer) v + " "); - System.out.println(); - } - }