package org.broadinstitute.sting.utils; import net.sf.samtools.*; import net.sf.samtools.util.StringUtil; import org.apache.log4j.Logger; import java.io.File; import java.util.*; /** * Created by IntelliJ IDEA. * User: depristo * Date: Feb 24, 2009 * Time: 10:12:31 AM * To change this template use File | Settings | File Templates. */ public class Utils { /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(Utils.class); public static void warnUser(final String msg) { logger.warn(String.format("********************************************************************************")); logger.warn(String.format("* WARNING:")); logger.warn(String.format("*")); prettyPrintWarningMessage(msg); logger.warn(String.format("********************************************************************************")); } public static void scareUser(final String msg) { //System.out.printf("********************************************************************************%n"); //System.out.printf("* ERROR:%n"); //System.out.printf("*%n"); //System.out.printf("* %s%n", msg); //System.out.printf("********************************************************************************%n"); logger.fatal(msg); throw new StingException(msg); } /** * Compares two objects, either of which might be null. * * @param lhs One object to compare. * @param rhs The other object to compare. * * @return True if the two objects are equal, false otherwise. */ public static boolean equals(Object lhs, Object rhs) { if (lhs == null && rhs == null) return true; else if (lhs == null) return false; else return lhs.equals(rhs); } public static List cons(final T elt, final List l) { List l2 = new ArrayList(); l2.add(elt); if (l != null) l2.addAll(l); return l2; } /** * pretty print the warning message supplied * * @param message the message */ private static void prettyPrintWarningMessage(String message) { StringBuilder builder = new StringBuilder(message); while (builder.length() > 70) { int space = builder.lastIndexOf(" ", 70); if (space <= 0) space = 70; logger.warn(String.format("* %s", builder.substring(0, space))); builder.delete(0, space + 1); } logger.warn(String.format("* %s", builder)); } public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) { SAMFileHeader copy = new SAMFileHeader(); copy.setSortOrder(toCopy.getSortOrder()); copy.setGroupOrder(toCopy.getGroupOrder()); copy.setProgramRecords(toCopy.getProgramRecords()); copy.setReadGroups(toCopy.getReadGroups()); copy.setSequenceDictionary(toCopy.getSequenceDictionary()); for (Map.Entry e : toCopy.getAttributes()) copy.setAttribute(e.getKey(), e.getValue()); return copy; } public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) { if (file.endsWith(".bam")) return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression); return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file)); } /** * Returns a new list built from those objects found in collection that satisfy the * predicate ( i.e. pred.apply() is true for the objects in th eresulting list ). * * @param pred filtering condition ( objects, for which pred.apply() is true pass the filter ) * @param c collection to filter (will not be modified) * * @return new list built from elements of passing the filter * @see #filterInPlace(Predicate pred, Collection c) */ public static List filter(Predicate pred, Collection c) { List filtered = new ArrayList(); // loop through all the elements in c for (T obj : c) { // if the predicate is true for the current element if (pred.apply(obj)) { // append it to the result list filtered.add(obj); } } return filtered; } /** * Removes from the collection all the elements that do not pass the filter (i.e. those elements, * for which pred.apply() is false ). This is an in-place method - the argument is modified, and no new * objects are created/copied. Collection's iterator (as returned by iterator()) must implement * optional remove() interface method that allows multiple subsequent removals of elements from the * underlying collection (this is the standard contract). This method * works best for collections that support cheap, constant time * object removal (such as LinkedList, HashSet etc.). It is also specifically designed to * detect ArrayLists and use optimized strategy for them. However * with other, custom lists that 1) do not inherit (are not instanceof) from ArrayList and 2) do not implement * fast (constant time) remove() operation, the performance can degrade significantly (linear traversal times, * e.g., linear removal ~ N^2). * * @param pred filtering condition (only elements, for which pred.apply() is true will be kept in the collection) * @param c collection to filter (will be modified - should be mutable and should implement remove() ) * * @return reference to the same (modified) collection * @see #filter(Predicate pred, Collection c) */ public static Collection filterInPlace(Predicate pred, Collection c) { if (c instanceof ArrayList) { // arraylists are a special case that we know how to process efficiently // (generic implementation below removes one element at a time and is not well suited // for ArrayLists List list = (List) c; int j = 0; // copy-to location // perform one linear pass copying forward all elements that pass the filter, // so that the head of the list is continuous sequence of such elements: for (int i = 0; i < list.size(); i++) { // if object passes, copy it forward and increment j (=copy-to location); // otherwise keep the same copy-to location and move on to the next element if (pred.apply(list.get(i))) list.set(j++, list.get(i)); } // j now points to first unused copy-to location; elements 0...j-1 pass the filter list.subList(j, list.size()).clear(); // remove tail of the list } /* // loop through all the elements in c for (T obj : c) { // if the predicate is false for the current element if (! pred.apply(obj)) { // remove that element from the collection c.remove(obj); } } */ Iterator it = c.iterator(); while (it.hasNext()) { if (pred.apply(it.next())) continue; it.remove(); } return c; } public static ArrayList subseq(char[] fullArray) { byte[] fullByteArray = new byte[fullArray.length]; StringUtil.charsToBytes(fullArray, 0, fullArray.length, fullByteArray, 0); return subseq(fullByteArray); } public static ArrayList subseq(byte[] fullArray) { return subseq(fullArray, 0, fullArray.length - 1); } public static ArrayList subseq(byte[] fullArray, int start, int end) { assert end < fullArray.length; ArrayList dest = new ArrayList(end - start + 1); for (int i = start; i <= end; i++) { dest.add(fullArray[i]); } return dest; } public static String baseList2string(List bases) { byte[] basesAsbytes = new byte[bases.size()]; int i = 0; for (Byte b : bases) { basesAsbytes[i] = b; i++; } return new String(basesAsbytes); } public static boolean is454Read(SAMRecord read) { SAMReadGroupRecord readGroup = read.getReadGroup(); if (readGroup != null) { Object readPlatformAttr = readGroup.getAttribute("PL"); if (readPlatformAttr != null) return readPlatformAttr.toString().toUpperCase().contains("454"); } return false; } private static final Map readFlagNames = new HashMap(); static { readFlagNames.put(0x1, "Paired"); readFlagNames.put(0x2, "Proper"); readFlagNames.put(0x4, "Unmapped"); readFlagNames.put(0x8, "MateUnmapped"); readFlagNames.put(0x10, "Forward"); //readFlagNames.put(0x20, "MateForward"); readFlagNames.put(0x4, "FirstOfPair"); readFlagNames.put(0x8, "SecondOfPair"); readFlagNames.put(0x100, "NotPrimary"); readFlagNames.put(0x200, "NON-PF"); readFlagNames.put(0x400, "Duplicate"); } public static String readFlagsAsString(SAMRecord rec) { String flags = ""; for (int flag : readFlagNames.keySet()) { if ((rec.getFlags() & flag) != 0) { flags += readFlagNames.get(flag) + " "; } } return flags; } /** * join the key value pairs of a map into one string, i.e. myMap = [A->1,B->2,C->3] with a call of: * joinMap("-","*",myMap) -> returns A-1*B-2*C-3 * * Be forewarned, if you're not using a map that is aware of the ordering (i.e. HashMap instead of LinkedHashMap) * the ordering of the string you get back might not be what you expect! (i.e. C-3*A-1*B-2 vrs A-1*B-2*C-3) * * @param keyValueSeperator the string to seperate the key-value pairs * @param recordSeperator the string to use to seperate each key-value pair from other key-value pairs * @param map the map to draw from * @param the map's key type * @param the map's value type * @return a string representing the joined map */ public static String joinMap(String keyValueSeperator, String recordSeperator, Map map) { if (map.size() < 1) { return null; } String joinedKeyValues[] = new String[map.size()]; int index = 0; for (L key : map.keySet()) { joinedKeyValues[index++] = String.format("%s%s%s",key.toString(),keyValueSeperator,map.get(key).toString()); } return join(recordSeperator,joinedKeyValues); } /** * join an array of strings given a seperator * @param separator the string to insert between each array element * @param strings the array of strings * @return a string, which is the joining of all array values with the separator */ public static String join(String separator, String[] strings) { return join(separator, strings, 0, strings.length); } public static String join(String separator, String[] strings, int start, int end) { if ((end - start) == 0) { return ""; } StringBuilder ret = new StringBuilder(strings[start]); for (int i = start + 1; i < end; ++i) { ret.append(separator); ret.append(strings[i]); } return ret.toString(); } //public static String join(String separator, Collection strings) { // return join( separator, strings.toArray(new String[0]) ); //} public static String join(String separator, Collection objects) { ArrayList strs = new ArrayList(); for (Object x : objects) strs.add(x.toString()); return join(separator, strs.toArray(new String[0])); } public static double average(List vals, int maxI) { long sum = 0L; int i = 0; for (long x : vals) { if (i > maxI) break; sum += x; i++; //System.out.printf(" %d/%d", sum, i); } //System.out.printf("Sum = %d, n = %d, maxI = %d, avg = %f%n", sum, i, maxI, (1.0 * sum) / i); return (1.0 * sum) / i; } public static double averageDouble(List vals, int maxI) { double sum = 0.0; int i = 0; for (double x : vals) { if (i > maxI) break; sum += x; i++; } return (1.0 * sum) / i; } public static double average(List vals) { return average(vals, vals.size()); } public static double averageDouble(List vals) { return averageDouble(vals, vals.size()); } // Java Generics can't do primitive types, so I had to do this the simplistic way public static Integer[] SortPermutation(final int[] A) { class comparator implements Comparator { public int compare(Integer a, Integer b) { if (A[a.intValue()] < A[b.intValue()]) { return -1; } if (A[a.intValue()] == A[b.intValue()]) { return 0; } if (A[a.intValue()] > A[b.intValue()]) { return 1; } return 0; } } Integer[] permutation = new Integer[A.length]; for (int i = 0; i < A.length; i++) { permutation[i] = i; } Arrays.sort(permutation, new comparator()); return permutation; } public static Integer[] SortPermutation(final double[] A) { class comparator implements Comparator { public int compare(Integer a, Integer b) { if (A[a.intValue()] < A[b.intValue()]) { return -1; } if (A[a.intValue()] == A[b.intValue()]) { return 0; } if (A[a.intValue()] > A[b.intValue()]) { return 1; } return 0; } } Integer[] permutation = new Integer[A.length]; for (int i = 0; i < A.length; i++) { permutation[i] = i; } Arrays.sort(permutation, new comparator()); return permutation; } public static Integer[] SortPermutation(List A) { final Object[] data = A.toArray(); class comparator implements Comparator { public int compare(Integer a, Integer b) { return ((T) data[a]).compareTo(data[b]); } } Integer[] permutation = new Integer[A.size()]; for (int i = 0; i < A.size(); i++) { permutation[i] = i; } Arrays.sort(permutation, new comparator()); return permutation; } public static int[] PermuteArray(int[] array, Integer[] permutation) { int[] output = new int[array.length]; for (int i = 0; i < output.length; i++) { output[i] = array[permutation[i]]; } return output; } public static double[] PermuteArray(double[] array, Integer[] permutation) { double[] output = new double[array.length]; for (int i = 0; i < output.length; i++) { output[i] = array[permutation[i]]; } return output; } public static Object[] PermuteArray(Object[] array, Integer[] permutation) { Object[] output = new Object[array.length]; for (int i = 0; i < output.length; i++) { output[i] = array[permutation[i]]; } return output; } public static String[] PermuteArray(String[] array, Integer[] permutation) { String[] output = new String[array.length]; for (int i = 0; i < output.length; i++) { output[i] = array[permutation[i]]; } return output; } public static List PermuteList(List list, Integer[] permutation) { List output = new ArrayList(); for (int i = 0; i < permutation.length; i++) { output.add(list.get(permutation[i])); } return output; } /** Draw N random elements from list. */ public static List RandomSubset(List list, int N) { if (list.size() <= N) { return list; } java.util.Random random = new java.util.Random(); int idx[] = new int[list.size()]; for (int i = 0; i < list.size(); i++) { idx[i] = random.nextInt(); } Integer[] perm = SortPermutation(idx); List ans = new ArrayList(); for (int i = 0; i < N; i++) { ans.add(list.get(perm[i])); } return ans; } // lifted from the internet // http://www.cs.princeton.edu/introcs/91float/Gamma.java.html public static double logGamma(double x) { double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5); double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1) + 24.01409822 / (x + 2) - 1.231739516 / (x + 3) + 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5); return tmp + Math.log(ser * Math.sqrt(2 * Math.PI)); } public static double percentage(double x, double base) { return (base > 0 ? (x / base) * 100.0 : 0); } public static double percentage(int x, int base) { return (base > 0 ? ((double) x / (double) base) * 100.0 : 0); } public static double percentage(long x, long base) { return (base > 0 ? ((double) x / (double) base) * 100.0 : 0); } public static String dupString(char c, int nCopies) { char[] chars = new char[nCopies]; Arrays.fill(chars, c); return new String(chars); } public static int countOccurrences(char c, String s) { int count = 0; for (int i = 0; i < s.length(); i++) { count += s.charAt(i) == c ? 1 : 0; } return count; } public static int countOccurrences(T x, List l) { int count = 0; for (T y : l) { if (x.equals(y)) count++; } return count; } public static byte listMaxByte(List quals) { if (quals.size() == 0) return 0; byte m = quals.get(0); for (byte b : quals) { m = b > m ? b : m; } return m; } // returns the maximum value in the array public static double findMaxEntry(double[] array) { return findIndexAndMaxEntry(array).first; } // returns the index of the maximum value in the array public static int findIndexOfMaxEntry(double[] array) { return findIndexAndMaxEntry(array).second; } // returns the the maximum value and its index in the array private static Pair findIndexAndMaxEntry(double[] array) { if ( array.length == 0 ) return new Pair(0.0, -1); int index = 0; double max = array[0]; for (int i = 1; i < array.length; i++) { if ( array[i] > max ) { max = array[i]; index = i; } } return new Pair(max, index); } /** Returns indices of all occurrences of the specified symbol in the string */ public static int[] indexOfAll(String s, int ch) { int[] pos = new int[64]; int z = 0; for (int i = 0; i < s.length(); i++) { if (s.charAt(i) == ch) pos[z++] = i; } return reallocate(pos, z); } /** * Returns new (reallocated) integer array of the specified size, with content * of the original array orig copied into it. If newSize is * less than the size of the original array, only first newSize elements will be copied. * If new size is greater than the size of the original array, the content of the original array will be padded * with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation * will be performed and the original array will be returned instead. * * @param orig * @param newSize * * @return */ public static int[] reallocate(int[] orig, int newSize) { if (orig.length == newSize) return orig; int[] new_array = new int[newSize]; int L = (newSize > orig.length ? orig.length : newSize); for (int i = 0; i < L; i++) new_array[i] = orig[i]; return new_array; } /* TEST ME public static void main(String[] argv) { List l1 = new LinkedList(); List l2 = new ArrayList(); l1.add(1); l1.add(5); l1.add(3); l1.add(10); l1.add(4); l1.add(2); l2.add(1); l2.add(5); l2.add(3); l2.add(10); l2.add(4); l2.add(2); Predicate p = new Predicate() { public boolean apply(Integer i) { return i > 2; } }; filterInPlace(p, l1); filterInPlace(p, l2); for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i)); System.out.println(); for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i)); System.out.println(); } */ /** * a helper method. Turns a single character string into a char. * * @param str the string * * @return a char */ public static char stringToChar(String str) { if (str.length() != 1) throw new IllegalArgumentException("String length must be one"); return str.charAt(0); } }