From a29fc6311a6632cf946e5ee00522b59f9915e5fe Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 21 Mar 2012 15:48:55 -0400 Subject: [PATCH 1/9] New debug option to output the assembly graph in dot format. Merge nodes in assembly graph when possible. --- .../sting/gatk/walkers/indels/PairHMMIndelErrorModel.java | 2 +- public/java/src/org/broadinstitute/sting/utils/MathUtils.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 64993b43a..890ed9e3d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -500,7 +500,7 @@ public class PairHMMIndelErrorModel { if (stop > ref.getWindow().getStop()) stop = ref.getWindow().getStop(); - // if there's an insertion in the read, the read stop position will be less than start + read legnth, + // if there's an insertion in the read, the read stop position will be less than start + read length, // but we want to compute likelihoods in the whole region that a read might overlap if (stop <= start + readLength) { stop = start + readLength-1; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index bfc326d2d..780eb2101 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -205,7 +205,7 @@ public class MathUtils { /** * Calculates the log10 cumulative sum of an array with log10 probabilities * - * @param log10p the array with log10 probabilites + * @param log10p the array with log10 probabilities * @param upTo index in the array to calculate the cumsum up to * @return the log10 of the cumulative sum */ From 019145175b7ba5ae5d44623388ec7d70bdb453a5 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 26 Mar 2012 11:32:44 -0400 Subject: [PATCH 9/9] Major optimizations to graph construction through better use of built in graph.containsVertex and vertex.equals methods. Minor optimizations to MathUtils.approximateLog10SumLog10 method --- .../broadinstitute/sting/utils/Haplotype.java | 5 ++ .../broadinstitute/sting/utils/MathUtils.java | 51 +++++++++---------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 051ba757d..143fdf4bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -72,6 +72,11 @@ public class Haplotype { public boolean equals( Object h ) { return h instanceof Haplotype && Arrays.equals(bases, ((Haplotype) h).bases); } + + @Override + public int hashCode() { + return Arrays.hashCode(bases); + } public void addReadLikelihoods( final String sample, final double[] readLikelihoods ) { if( readLikelihoodsPerSample == null ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 780eb2101..7c882ac6d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -50,7 +50,8 @@ public class MathUtils { public static final double[] log10Cache; private static final double[] jacobianLogTable; private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; - private static final double MAX_JACOBIAN_TOLERANCE = 10.0; + private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001; + private static final double MAX_JACOBIAN_TOLERANCE = 8.0; private static final int JACOBIAN_LOG_TABLE_SIZE = (int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1; private static final int MAXN = 11000; private static final int LOG10_CACHE_SIZE = 4 * MAXN; // we need to be able to go up to 2*(2N) when calculating some of the coefficients @@ -73,7 +74,7 @@ public class MathUtils { // under/overflow checking, so this shouldn't be used in the general case (but is fine // if one is already make those checks before calling in to the rounding). public static int fastRound(double d) { - return (d > 0) ? (int) (d + 0.5d) : (int) (d - 0.5d); + return (d > 0.0) ? (int) (d + 0.5d) : (int) (d - 0.5d); } public static double approximateLog10SumLog10(final double[] vals) { @@ -84,8 +85,6 @@ public class MathUtils { final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex); double approxSum = vals[maxElementIndex]; - if (approxSum == Double.NEGATIVE_INFINITY) - return approxSum; for (int i = 0; i < endIndex; i++) { if (i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY) @@ -94,7 +93,7 @@ public class MathUtils { final double diff = approxSum - vals[i]; if (diff < MathUtils.MAX_JACOBIAN_TOLERANCE) { // See notes from the 2-inout implementation below - final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding + final int ind = fastRound(diff * MathUtils.JACOBIAN_LOG_TABLE_INV_STEP); // hard rounding approxSum += MathUtils.jacobianLogTable[ind]; } } @@ -123,7 +122,7 @@ public class MathUtils { // max(x,y) + log10(1+10^-abs(x-y)) // we compute the second term as a table lookup with integer quantization // we have pre-stored correction for 0,0.1,0.2,... 10.0 - final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding + final int ind = fastRound(diff * MathUtils.JACOBIAN_LOG_TABLE_INV_STEP); // hard rounding return big + MathUtils.jacobianLogTable[ind]; } @@ -591,12 +590,12 @@ public class MathUtils { } public static int maxElementIndex(final double[] array, final int endIndex) { - if (array == null) + if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); - int maxI = -1; - for (int i = 0; i < endIndex; i++) { - if (maxI == -1 || array[i] > array[maxI]) + int maxI = 0; + for (int i = 1; i < endIndex; i++) { + if (array[i] > array[maxI]) maxI = i; } @@ -608,12 +607,12 @@ public class MathUtils { } public static int maxElementIndex(final int[] array, int endIndex) { - if (array == null) + if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); - int maxI = -1; - for (int i = 0; i < endIndex; i++) { - if (maxI == -1 || array[i] > array[maxI]) + int maxI = 0; + for (int i = 1; i < endIndex; i++) { + if (array[i] > array[maxI]) maxI = i; } @@ -637,12 +636,12 @@ public class MathUtils { } public static int minElementIndex(double[] array) { - if (array == null) + if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); - int minI = -1; - for (int i = 0; i < array.length; i++) { - if (minI == -1 || array[i] < array[minI]) + int minI = 0; + for (int i = 1; i < array.length; i++) { + if (array[i] < array[minI]) minI = i; } @@ -650,12 +649,12 @@ public class MathUtils { } public static int minElementIndex(byte[] array) { - if (array == null) + if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); - int minI = -1; - for (int i = 0; i < array.length; i++) { - if (minI == -1 || array[i] < array[minI]) + int minI = 0; + for (int i = 1; i < array.length; i++) { + if (array[i] < array[minI]) minI = i; } @@ -663,12 +662,12 @@ public class MathUtils { } public static int minElementIndex(int[] array) { - if (array == null) + if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); - int minI = -1; - for (int i = 0; i < array.length; i++) { - if (minI == -1 || array[i] < array[minI]) + int minI = 0; + for (int i = 1; i < array.length; i++) { + if (array[i] < array[minI]) minI = i; }