diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 491e4e25e..de8c81031 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -419,6 +419,32 @@ public class BaseUtils { return new String(simpleComplement(bases.getBytes())); } + /** + * Returns the index of the most common base in the basecounts array. To be used with + * pileup.getBaseCounts. + * + * @param baseCounts counts of a,c,g,t in order. + * @return the index of the most common base + */ + static public int mostFrequentBaseIndex(int[] baseCounts) { + int mostFrequentBaseIndex = 0; + for (int baseIndex = 1; baseIndex < 4; baseIndex++) { + if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) { + mostFrequentBaseIndex = baseIndex; + } + } + return mostFrequentBaseIndex; + } + + /** + * Returns the most common base in the basecounts array. To be used with pileup.getBaseCounts. + * + * @param baseCounts counts of a,c,g,t in order. + * @return the most common base + */ + static public byte mostFrequentSimpleBase(int[] baseCounts) { + return baseIndexToSimpleBase(mostFrequentBaseIndex(baseCounts)); + } /** * For the most frequent base in the sequence, return the percentage of the read it constitutes. @@ -437,12 +463,7 @@ public class BaseUtils { } } - int mostFrequentBaseIndex = 0; - for (int baseIndex = 1; baseIndex < 4; baseIndex++) { - if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) { - mostFrequentBaseIndex = baseIndex; - } - } + int mostFrequentBaseIndex = mostFrequentBaseIndex(baseCounts); return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index f823e6d42..9bd3f603c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; +import org.apache.lucene.messages.NLS; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -124,6 +125,16 @@ public class MathUtils { } + /** + * Calculates the log10 cumulative sum of an array with log10 probabilities + * @param log10p the array with log10 probabilites + * @param upTo index in the array to calculate the cumsum up to + * @return the log10 of the cumulative sum + */ + public static double log10CumulativeSumLog10(double [] log10p, int upTo) { + return log10sumLog10(log10p, 0, upTo); + } + /** * Converts a real space array of probabilities into a log10 array * @param prRealSpace @@ -137,10 +148,14 @@ public class MathUtils { } public static double log10sumLog10(double[] log10p, int start) { + return log10sumLog10(log10p, start, log10p.length); + } + + public static double log10sumLog10(double[] log10p, int start, int finish) { double sum = 0.0; double maxValue = Utils.findMaxEntry(log10p); - for ( int i = start; i < log10p.length; i++ ) { + for ( int i = start; i < finish; i++ ) { sum += Math.pow(10.0, log10p[i] - maxValue); } @@ -471,6 +486,18 @@ public class MathUtils { return maxI; } + public static int maxElementIndex(int[] array) { + if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); + + int maxI = -1; + for ( int i = 0; i < array.length; i++ ) { + if ( maxI == -1 || array[i] > array[maxI] ) + maxI = i; + } + + return maxI; + } + public static double arrayMax(double[] array) { return array[maxElementIndex(array)]; } @@ -727,7 +754,7 @@ public class MathUtils { return count; } - public static int countOccurrences(byte element, byte [] array) { + public static int countOccurrences(byte element, byte [] array) { int count = 0; for (byte y : array) { if (element == y) @@ -737,6 +764,27 @@ public class MathUtils { return count; } + /** + * Returns the top (larger) N elements of the array. Naive n^2 implementation (Selection Sort). + * Better than sorting if N (number of elements to return) is small + * + * @param array the array + * @param n number of top elements to return + * @return the n larger elements of the array + */ + public static Collection getNMaxElements(double [] array, int n) { + ArrayList maxN = new ArrayList(n); + double lastMax = Double.MAX_VALUE; + for (int i=0; i