diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 491e4e25e..673b1524d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -419,6 +419,44 @@ public class BaseUtils { return new String(simpleComplement(bases.getBytes())); } + /** + * Returns the index of the most common base in the basecounts array. To be used with + * pileup.getBaseCounts. + * + * @param baseCounts counts of a,c,g,t in order. + * @return the index of the most common base + */ + static public int mostFrequentBaseIndex(int[] baseCounts) { + int mostFrequentBaseIndex = 0; + for (int baseIndex = 1; baseIndex < 4; baseIndex++) { + if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) { + mostFrequentBaseIndex = baseIndex; + } + } + return mostFrequentBaseIndex; + } + + static public int mostFrequentBaseIndexNotRef(int[] baseCounts, int refBaseIndex) { + int tmp = baseCounts[refBaseIndex]; + baseCounts[refBaseIndex] = -1; + int result = mostFrequentBaseIndex(baseCounts); + baseCounts[refBaseIndex] = tmp; + return result; + } + + static public int mostFrequentBaseIndexNotRef(int[] baseCounts, byte refSimpleBase) { + return mostFrequentBaseIndexNotRef(baseCounts, simpleBaseToBaseIndex(refSimpleBase)); + } + + /** + * Returns the most common base in the basecounts array. To be used with pileup.getBaseCounts. + * + * @param baseCounts counts of a,c,g,t in order. + * @return the most common base + */ + static public byte mostFrequentSimpleBase(int[] baseCounts) { + return baseIndexToSimpleBase(mostFrequentBaseIndex(baseCounts)); + } /** * For the most frequent base in the sequence, return the percentage of the read it constitutes. @@ -437,12 +475,7 @@ public class BaseUtils { } } - int mostFrequentBaseIndex = 0; - for (int baseIndex = 1; baseIndex < 4; baseIndex++) { - if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) { - mostFrequentBaseIndex = baseIndex; - } - } + int mostFrequentBaseIndex = mostFrequentBaseIndex(baseCounts); return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index cbe2948aa..e197bb973 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; +import org.apache.lucene.messages.NLS; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -124,6 +125,16 @@ public class MathUtils { } + /** + * Calculates the log10 cumulative sum of an array with log10 probabilities + * @param log10p the array with log10 probabilites + * @param upTo index in the array to calculate the cumsum up to + * @return the log10 of the cumulative sum + */ + public static double log10CumulativeSumLog10(double [] log10p, int upTo) { + return log10sumLog10(log10p, 0, upTo); + } + /** * Converts a real space array of probabilities into a log10 array * @param prRealSpace @@ -137,10 +148,14 @@ public class MathUtils { } public static double log10sumLog10(double[] log10p, int start) { + return log10sumLog10(log10p, start, log10p.length); + } + + public static double log10sumLog10(double[] log10p, int start, int finish) { double sum = 0.0; double maxValue = Utils.findMaxEntry(log10p); - for ( int i = start; i < log10p.length; i++ ) { + for ( int i = start; i < finish; i++ ) { sum += Math.pow(10.0, log10p[i] - maxValue); } @@ -345,6 +360,23 @@ public class MathUtils { return Math.pow(10,log10MultinomialProbability(n, k, log10P)); } + /** + * calculate the Root Mean Square of an array of integers + * @param x an byte[] of numbers + * @return the RMS of the specified numbers. + */ + public static double rms(byte[] x) { + if ( x.length == 0 ) + return 0.0; + + double rms = 0.0; + for (int i : x) + rms += i * i; + rms /= x.length; + return Math.sqrt(rms); + } + + /** * calculate the Root Mean Square of an array of integers * @param x an int[] of numbers @@ -471,6 +503,18 @@ public class MathUtils { return maxI; } + public static int maxElementIndex(int[] array) { + if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); + + int maxI = -1; + for ( int i = 0; i < array.length; i++ ) { + if ( maxI == -1 || array[i] > array[maxI] ) + maxI = i; + } + + return maxI; + } + public static double arrayMax(double[] array) { return array[maxElementIndex(array)]; } @@ -727,6 +771,38 @@ public class MathUtils { return count; } + public static int countOccurrences(byte element, byte [] array) { + int count = 0; + for (byte y : array) { + if (element == y) + count++; + } + + return count; + } + + /** + * Returns the top (larger) N elements of the array. Naive n^2 implementation (Selection Sort). + * Better than sorting if N (number of elements to return) is small + * + * @param array the array + * @param n number of top elements to return + * @return the n larger elements of the array + */ + public static Collection getNMaxElements(double [] array, int n) { + ArrayList maxN = new ArrayList(n); + double lastMax = Double.MAX_VALUE; + for (int i=0; i