2010-04-20 07:00:08 +08:00
|
|
|
/*
|
2013-01-11 06:04:08 +08:00
|
|
|
* Copyright (c) 2012 The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
|
|
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
2010-04-20 07:00:08 +08:00
|
|
|
|
2009-03-12 04:58:01 +08:00
|
|
|
package org.broadinstitute.sting.utils;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2012-03-29 00:55:29 +08:00
|
|
|
import com.google.java.contract.Requires;
|
2012-03-17 02:09:07 +08:00
|
|
|
import net.sf.samtools.SAMFileHeader;
|
|
|
|
|
import net.sf.samtools.SAMProgramRecord;
|
2009-05-12 06:45:11 +08:00
|
|
|
import net.sf.samtools.util.StringUtil;
|
2009-10-23 14:31:15 +08:00
|
|
|
import org.apache.log4j.Logger;
|
2012-03-17 02:09:07 +08:00
|
|
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
|
|
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|
|
|
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2011-08-29 00:04:16 +08:00
|
|
|
import java.net.InetAddress;
|
2011-07-18 08:29:58 +08:00
|
|
|
import java.util.*;
|
|
|
|
|
|
2009-02-27 05:50:29 +08:00
|
|
|
/**
|
|
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: depristo
|
|
|
|
|
* Date: Feb 24, 2009
|
|
|
|
|
* Time: 10:12:31 AM
|
|
|
|
|
* To change this template use File | Settings | File Templates.
|
|
|
|
|
*/
|
|
|
|
|
public class Utils {
|
2009-10-23 14:31:15 +08:00
|
|
|
/** our log, which we want to capture anything from this class */
|
2009-06-05 23:02:17 +08:00
|
|
|
private static Logger logger = Logger.getLogger(Utils.class);
|
2009-03-27 22:02:55 +08:00
|
|
|
|
2011-08-03 09:59:06 +08:00
|
|
|
public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Calculates the optimum initial size for a hash table given the maximum number
|
|
|
|
|
* of elements it will need to hold. The optimum size is the smallest size that
|
|
|
|
|
* is guaranteed not to result in any rehash/table-resize operations.
|
|
|
|
|
*
|
|
|
|
|
* @param maxElements The maximum number of elements you expect the hash table
|
|
|
|
|
* will need to hold
|
|
|
|
|
* @return The optimum initial size for the table, given maxElements
|
|
|
|
|
*/
|
|
|
|
|
public static int optimumHashSize ( int maxElements ) {
|
|
|
|
|
return (int)(maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2;
|
|
|
|
|
}
|
|
|
|
|
|
2009-08-23 08:56:02 +08:00
|
|
|
/**
|
|
|
|
|
* Compares two objects, either of which might be null.
|
2009-10-23 14:31:15 +08:00
|
|
|
*
|
2009-08-23 08:56:02 +08:00
|
|
|
* @param lhs One object to compare.
|
|
|
|
|
* @param rhs The other object to compare.
|
2009-10-23 14:31:15 +08:00
|
|
|
*
|
2009-08-23 08:56:02 +08:00
|
|
|
* @return True if the two objects are equal, false otherwise.
|
|
|
|
|
*/
|
|
|
|
|
public static boolean equals(Object lhs, Object rhs) {
|
2009-10-23 14:31:15 +08:00
|
|
|
if (lhs == null && rhs == null) return true;
|
|
|
|
|
else if (lhs == null) return false;
|
2009-08-23 08:56:02 +08:00
|
|
|
else return lhs.equals(rhs);
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-06 07:34:37 +08:00
|
|
|
public static <T> List<T> cons(final T elt, final List<T> l) {
|
|
|
|
|
List<T> l2 = new ArrayList<T>();
|
|
|
|
|
l2.add(elt);
|
2009-10-23 14:31:15 +08:00
|
|
|
if (l != null) l2.addAll(l);
|
2009-06-06 07:34:37 +08:00
|
|
|
return l2;
|
|
|
|
|
}
|
|
|
|
|
|
2010-09-10 07:21:17 +08:00
|
|
|
public static void warnUser(final String msg) {
|
2011-10-27 11:05:41 +08:00
|
|
|
warnUser(logger, msg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void warnUser(final Logger logger, final String msg) {
|
2010-09-10 07:21:17 +08:00
|
|
|
logger.warn(String.format("********************************************************************************"));
|
|
|
|
|
logger.warn(String.format("* WARNING:"));
|
|
|
|
|
logger.warn(String.format("*"));
|
2011-10-27 11:05:41 +08:00
|
|
|
prettyPrintWarningMessage(logger, msg);
|
2010-09-10 07:21:17 +08:00
|
|
|
logger.warn(String.format("********************************************************************************"));
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-05 23:49:03 +08:00
|
|
|
/**
|
|
|
|
|
* pretty print the warning message supplied
|
2009-10-23 14:31:15 +08:00
|
|
|
*
|
2011-10-27 11:05:41 +08:00
|
|
|
* @param logger logger for the message
|
2009-06-05 23:49:03 +08:00
|
|
|
* @param message the message
|
|
|
|
|
*/
|
2011-10-27 11:05:41 +08:00
|
|
|
private static void prettyPrintWarningMessage(Logger logger, String message) {
|
2009-06-05 23:49:03 +08:00
|
|
|
StringBuilder builder = new StringBuilder(message);
|
|
|
|
|
while (builder.length() > 70) {
|
|
|
|
|
int space = builder.lastIndexOf(" ", 70);
|
|
|
|
|
if (space <= 0) space = 70;
|
2009-10-23 14:31:15 +08:00
|
|
|
logger.warn(String.format("* %s", builder.substring(0, space)));
|
|
|
|
|
builder.delete(0, space + 1);
|
2009-06-05 23:49:03 +08:00
|
|
|
}
|
|
|
|
|
logger.warn(String.format("* %s", builder));
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-12 06:45:11 +08:00
|
|
|
public static ArrayList<Byte> subseq(char[] fullArray) {
|
|
|
|
|
byte[] fullByteArray = new byte[fullArray.length];
|
2009-10-23 14:31:15 +08:00
|
|
|
StringUtil.charsToBytes(fullArray, 0, fullArray.length, fullByteArray, 0);
|
2009-05-12 06:45:11 +08:00
|
|
|
return subseq(fullByteArray);
|
|
|
|
|
}
|
|
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
public static ArrayList<Byte> subseq(byte[] fullArray) {
|
2009-10-23 14:31:15 +08:00
|
|
|
return subseq(fullArray, 0, fullArray.length - 1);
|
2009-03-16 22:46:19 +08:00
|
|
|
}
|
2009-03-27 22:02:55 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
|
2009-04-06 11:51:35 +08:00
|
|
|
assert end < fullArray.length;
|
2009-03-27 22:02:55 +08:00
|
|
|
ArrayList<Byte> dest = new ArrayList<Byte>(end - start + 1);
|
2009-05-12 06:45:11 +08:00
|
|
|
for (int i = start; i <= end; i++) {
|
2009-03-16 22:46:19 +08:00
|
|
|
dest.add(fullArray[i]);
|
|
|
|
|
}
|
|
|
|
|
return dest;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String baseList2string(List<Byte> bases) {
|
|
|
|
|
byte[] basesAsbytes = new byte[bases.size()];
|
|
|
|
|
int i = 0;
|
2009-03-27 22:02:55 +08:00
|
|
|
for (Byte b : bases) {
|
2009-03-16 22:46:19 +08:00
|
|
|
basesAsbytes[i] = b;
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
return new String(basesAsbytes);
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-18 00:50:01 +08:00
|
|
|
/**
|
|
|
|
|
* join the key value pairs of a map into one string, i.e. myMap = [A->1,B->2,C->3] with a call of:
|
|
|
|
|
* joinMap("-","*",myMap) -> returns A-1*B-2*C-3
|
|
|
|
|
*
|
|
|
|
|
* Be forewarned, if you're not using a map that is aware of the ordering (i.e. HashMap instead of LinkedHashMap)
|
|
|
|
|
* the ordering of the string you get back might not be what you expect! (i.e. C-3*A-1*B-2 vrs A-1*B-2*C-3)
|
|
|
|
|
*
|
|
|
|
|
* @param keyValueSeperator the string to seperate the key-value pairs
|
|
|
|
|
* @param recordSeperator the string to use to seperate each key-value pair from other key-value pairs
|
|
|
|
|
* @param map the map to draw from
|
|
|
|
|
* @param <L> the map's key type
|
|
|
|
|
* @param <R> the map's value type
|
|
|
|
|
* @return a string representing the joined map
|
|
|
|
|
*/
|
|
|
|
|
public static <L,R> String joinMap(String keyValueSeperator, String recordSeperator, Map<L,R> map) {
|
|
|
|
|
if (map.size() < 1) { return null; }
|
|
|
|
|
String joinedKeyValues[] = new String[map.size()];
|
|
|
|
|
int index = 0;
|
|
|
|
|
for (L key : map.keySet()) {
|
|
|
|
|
joinedKeyValues[index++] = String.format("%s%s%s",key.toString(),keyValueSeperator,map.get(key).toString());
|
|
|
|
|
}
|
|
|
|
|
return join(recordSeperator,joinedKeyValues);
|
|
|
|
|
}
|
|
|
|
|
|
2010-05-19 11:37:26 +08:00
|
|
|
/**
|
|
|
|
|
* Splits a String using indexOf instead of regex to speed things up.
|
|
|
|
|
*
|
|
|
|
|
* @param str the string to split.
|
|
|
|
|
* @param delimiter the delimiter used to split the string.
|
|
|
|
|
* @return an array of tokens.
|
|
|
|
|
*/
|
|
|
|
|
public static ArrayList<String> split(String str, String delimiter) {
|
|
|
|
|
return split(str, delimiter, 10);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Splits a String using indexOf instead of regex to speed things up.
|
|
|
|
|
*
|
|
|
|
|
* @param str the string to split.
|
|
|
|
|
* @param delimiter the delimiter used to split the string.
|
|
|
|
|
* @param expectedNumTokens The number of tokens expected. This is used to initialize the ArrayList.
|
|
|
|
|
* @return an array of tokens.
|
|
|
|
|
*/
|
|
|
|
|
public static ArrayList<String> split(String str, String delimiter, int expectedNumTokens) {
|
|
|
|
|
final ArrayList<String> result = new ArrayList<String>(expectedNumTokens);
|
|
|
|
|
|
|
|
|
|
int delimiterIdx = -1;
|
|
|
|
|
do {
|
|
|
|
|
final int tokenStartIdx = delimiterIdx + 1;
|
|
|
|
|
delimiterIdx = str.indexOf(delimiter, tokenStartIdx);
|
|
|
|
|
final String token = (delimiterIdx != -1 ? str.substring(tokenStartIdx, delimiterIdx) : str.substring(tokenStartIdx) );
|
|
|
|
|
result.add(token);
|
|
|
|
|
} while( delimiterIdx != -1 );
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2009-11-18 00:50:01 +08:00
|
|
|
/**
|
|
|
|
|
* join an array of strings given a seperator
|
|
|
|
|
* @param separator the string to insert between each array element
|
|
|
|
|
* @param strings the array of strings
|
|
|
|
|
* @return a string, which is the joining of all array values with the separator
|
|
|
|
|
*/
|
2009-02-28 01:07:57 +08:00
|
|
|
public static String join(String separator, String[] strings) {
|
2009-04-02 06:54:38 +08:00
|
|
|
return join(separator, strings, 0, strings.length);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static String join(String separator, String[] strings, int start, int end) {
|
|
|
|
|
if ((end - start) == 0) {
|
2009-02-28 01:07:57 +08:00
|
|
|
return "";
|
|
|
|
|
}
|
2009-04-02 06:54:38 +08:00
|
|
|
StringBuilder ret = new StringBuilder(strings[start]);
|
2009-10-23 14:31:15 +08:00
|
|
|
for (int i = start + 1; i < end; ++i) {
|
2009-02-28 01:07:57 +08:00
|
|
|
ret.append(separator);
|
|
|
|
|
ret.append(strings[i]);
|
|
|
|
|
}
|
|
|
|
|
return ret.toString();
|
|
|
|
|
}
|
2009-03-03 02:18:48 +08:00
|
|
|
|
2012-06-02 07:25:11 +08:00
|
|
|
public static String join(String separator, int[] ints) {
|
|
|
|
|
if ( ints == null || ints.length == 0)
|
|
|
|
|
return "";
|
|
|
|
|
else {
|
Algorithmically faster version of DiffEngine
-- Now only includes leaf nodes in the summary, i.e., summaries of the form "*.*....*.X", which are really the most valuable to see. This calculation can be accomplished in linear time for N differences, rather than the previous O(n^2) algorithm
-- Now computes the max number of elements to read correctly. Counts now the size of the entire element tree, not just the count of the roots, which was painful because the trees vary by orders of magnitude in size.
-- Because of this we can enforce a meaningful, useful value for the max elements in MD5 or 100K, and this works well.
-- Added integration test for new leaf and old pairwise calculations
-- Bugfix for Utils.join(sep, int[]) that was eating the first element of the AD, PL fields
2012-06-11 08:13:18 +08:00
|
|
|
StringBuilder ret = new StringBuilder();
|
|
|
|
|
ret.append(ints[0]);
|
2012-06-02 07:25:11 +08:00
|
|
|
for (int i = 1; i < ints.length; ++i) {
|
|
|
|
|
ret.append(separator);
|
|
|
|
|
ret.append(ints[i]);
|
|
|
|
|
}
|
|
|
|
|
return ret.toString();
|
2012-10-02 21:39:51 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-03 05:27:09 +08:00
|
|
|
public static <T> List<T> append(final List<T> left, T ... elts) {
|
|
|
|
|
final List<T> l = new LinkedList<T>(left);
|
|
|
|
|
l.addAll(Arrays.asList(elts));
|
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
|
2012-10-02 21:39:51 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a string of the values in joined by separator, such as A,B,C
|
|
|
|
|
*
|
|
|
|
|
* @param separator
|
|
|
|
|
* @param doubles
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static String join(String separator, double[] doubles) {
|
|
|
|
|
if ( doubles == null || doubles.length == 0)
|
|
|
|
|
return "";
|
|
|
|
|
else {
|
|
|
|
|
StringBuilder ret = new StringBuilder();
|
|
|
|
|
ret.append(doubles[0]);
|
|
|
|
|
for (int i = 1; i < doubles.length; ++i) {
|
|
|
|
|
ret.append(separator);
|
|
|
|
|
ret.append(doubles[i]);
|
|
|
|
|
}
|
|
|
|
|
return ret.toString();
|
2012-06-02 07:25:11 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-21 22:15:05 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
|
|
|
|
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
|
|
|
|
* "" if collection is empty. If collection contains just elt, then returns elt.toString()
|
|
|
|
|
*
|
|
|
|
|
* @param separator the string to use to separate objects
|
|
|
|
|
* @param objects a collection of objects. the element order is defined by the iterator over objects
|
|
|
|
|
* @param <T> the type of the objects
|
|
|
|
|
* @return a non-null string
|
|
|
|
|
*/
|
|
|
|
|
public static <T> String join(final String separator, final Collection<T> objects) {
|
|
|
|
|
if (objects.isEmpty()) { // fast path for empty collection
|
2010-05-19 11:37:26 +08:00
|
|
|
return "";
|
2011-09-21 22:15:05 +08:00
|
|
|
} else {
|
|
|
|
|
final Iterator<T> iter = objects.iterator();
|
|
|
|
|
final T first = iter.next();
|
|
|
|
|
|
|
|
|
|
if ( ! iter.hasNext() ) // fast path for singleton collections
|
|
|
|
|
return first.toString();
|
|
|
|
|
else { // full path for 2+ collection that actually need a join
|
|
|
|
|
final StringBuilder ret = new StringBuilder(first.toString());
|
|
|
|
|
while(iter.hasNext()) {
|
|
|
|
|
ret.append(separator);
|
|
|
|
|
ret.append(iter.next().toString());
|
|
|
|
|
}
|
|
|
|
|
return ret.toString();
|
|
|
|
|
}
|
2010-05-19 11:37:26 +08:00
|
|
|
}
|
2009-03-03 02:18:48 +08:00
|
|
|
}
|
|
|
|
|
|
NA12878 knowledge base backed by MongoDB
-- Idea is simply to create a persistent database of all TP/FP sites on chr20 in NA12878. Individual callsets can be imported, and a consensus algorithm is run over all callsets in the database to create a consensus collection, which can be used to assess NA12878 callsets for GATK and methods development
-- Framework for representing simple VariantContexts and Genotypes in MongoDB, querying for records, and iterating over them in the GATK
-- Not hooked up to Tribble, but could be done reasonably easily now (future TODO)
-- Tools to import callsets, create consensus callsets, import and export reviews
-- Scripts to reset the knowledge base and repopulate it with the standard data files (Eric will expand)
-- Actually scales to all of chr20, includes AssessNA12878 that reads a VCF and itemizes it against the truth data set
-- ImportCallset can load OMNI, HM3, CEU best practices, mills/devine sites and genotypes, properly marking sites as poly/mono/unk as well as TP/FP/UNK based on command line parameters
-- Added shell scripts that start up a local mongo db, that connect to a local or BI hosted mongo for NA12878.db for debugging, and a setupNA12878db script that can load OMNI, HM3, CEU best practices, Mills/Devine into the db and then update the consensus.
-- Reviewed sites can be exported to a VCF, and imported again, as a mechanism to safely store the only non-recoverable data from the Mongo DB.
-- Created a NA12878DBWalker that manages the outer DB interaction, and that all MongoDB interacting walkers inherit from. Added a NA12878DBArgumentCollection.java consolating all of the common command line arguments (though strictly not necessary as all of this occurs in the root walker)
UnitTests
-- Can connect to a test knowledge base for development and unit testing
-- PolymorphicStatus, TruthStatus, SiteIterator
-- NA12878KBUnitTestBase provides simple utilities for connecting to the test mongo db, getting calls, etc
-- MongoVariantContext tests creation, matching, and encoding -> writing -> read -> decoding from the mongodb
AssessNA12878
-- Generic tool for comparing a NA12878 callset against the knowledge base. See http://gatkforums.broadinstitute.org/discussion/1848/using-the-na12878-knowledge-base for detailed documentation
-- Performs trivial filtering on FS, MQ, QD for SNPs and non-SNPs to separate out variants likely to be filtered from those that are honest-to-goodness FPs
Misc
-- Ability to provide Description for Simplified GATK report
2012-11-05 06:40:17 +08:00
|
|
|
public static <T> String join(final String separator, final T ... objects) {
|
|
|
|
|
return join(separator, Arrays.asList(objects));
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-23 14:31:15 +08:00
|
|
|
public static String dupString(char c, int nCopies) {
|
2009-05-22 06:23:52 +08:00
|
|
|
char[] chars = new char[nCopies];
|
2009-10-23 14:31:15 +08:00
|
|
|
Arrays.fill(chars, c);
|
2009-05-22 06:23:52 +08:00
|
|
|
return new String(chars);
|
|
|
|
|
}
|
2009-05-08 02:03:49 +08:00
|
|
|
|
2010-03-25 02:17:56 +08:00
|
|
|
public static byte[] dupBytes(byte b, int nCopies) {
|
|
|
|
|
byte[] bytes = new byte[nCopies];
|
|
|
|
|
Arrays.fill(bytes, b);
|
|
|
|
|
return bytes;
|
|
|
|
|
}
|
|
|
|
|
|
2010-01-08 01:51:41 +08:00
|
|
|
// trim a string for the given character (i.e. not just whitespace)
|
|
|
|
|
public static String trim(String str, char ch) {
|
|
|
|
|
char[] array = str.toCharArray();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int start = 0;
|
|
|
|
|
while ( start < array.length && array[start] == ch )
|
|
|
|
|
start++;
|
|
|
|
|
|
|
|
|
|
int end = array.length - 1;
|
|
|
|
|
while ( end > start && array[end] == ch )
|
|
|
|
|
end--;
|
|
|
|
|
|
|
|
|
|
return str.substring(start, end+1);
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-08 02:03:49 +08:00
|
|
|
public static byte listMaxByte(List<Byte> quals) {
|
2009-10-23 14:31:15 +08:00
|
|
|
if (quals.size() == 0) return 0;
|
2009-05-08 02:03:49 +08:00
|
|
|
byte m = quals.get(0);
|
2009-10-23 14:31:15 +08:00
|
|
|
for (byte b : quals) {
|
2009-05-08 02:03:49 +08:00
|
|
|
m = b > m ? b : m;
|
|
|
|
|
}
|
|
|
|
|
return m;
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
/**
|
|
|
|
|
* Splits expressions in command args by spaces and returns the array of expressions.
|
|
|
|
|
* Expressions may use single or double quotes to group any individual expression, but not both.
|
|
|
|
|
* @param args Arguments to parse.
|
|
|
|
|
* @return Parsed expressions.
|
|
|
|
|
*/
|
|
|
|
|
public static String[] escapeExpressions(String args) {
|
|
|
|
|
// special case for ' and " so we can allow expressions
|
|
|
|
|
if (args.indexOf('\'') != -1)
|
|
|
|
|
return escapeExpressions(args, "'");
|
|
|
|
|
else if (args.indexOf('\"') != -1)
|
|
|
|
|
return escapeExpressions(args, "\"");
|
|
|
|
|
else
|
2011-03-26 08:41:47 +08:00
|
|
|
return args.trim().split(" +");
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Splits expressions in command args by spaces and the supplied delimiter and returns the array of expressions.
|
|
|
|
|
* @param args Arguments to parse.
|
|
|
|
|
* @param delimiter Delimiter for grouping expressions.
|
|
|
|
|
* @return Parsed expressions.
|
|
|
|
|
*/
|
|
|
|
|
private static String[] escapeExpressions(String args, String delimiter) {
|
|
|
|
|
String[] command = {};
|
|
|
|
|
String[] split = args.split(delimiter);
|
2010-11-23 06:59:42 +08:00
|
|
|
String arg;
|
2010-11-13 04:14:28 +08:00
|
|
|
for (int i = 0; i < split.length - 1; i += 2) {
|
2010-11-23 06:59:42 +08:00
|
|
|
arg = split[i].trim();
|
|
|
|
|
if (arg.length() > 0) // if the unescaped arg has a size
|
2011-03-26 08:41:47 +08:00
|
|
|
command = Utils.concatArrays(command, arg.split(" +"));
|
2010-11-13 04:14:28 +08:00
|
|
|
command = Utils.concatArrays(command, new String[]{split[i + 1]});
|
|
|
|
|
}
|
2010-11-23 06:59:42 +08:00
|
|
|
arg = split[split.length - 1].trim();
|
|
|
|
|
if (split.length % 2 == 1) // if the command ends with a delimiter
|
|
|
|
|
if (arg.length() > 0) // if the last unescaped arg has a size
|
2011-03-26 08:41:47 +08:00
|
|
|
command = Utils.concatArrays(command, arg.split(" +"));
|
2010-11-23 06:59:42 +08:00
|
|
|
return command;
|
2010-11-13 04:14:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Concatenates two String arrays.
|
|
|
|
|
* @param A First array.
|
|
|
|
|
* @param B Second array.
|
|
|
|
|
* @return Concatenation of A then B.
|
|
|
|
|
*/
|
2009-11-24 10:34:48 +08:00
|
|
|
public static String[] concatArrays(String[] A, String[] B) {
|
|
|
|
|
String[] C = new String[A.length + B.length];
|
|
|
|
|
System.arraycopy(A, 0, C, 0, A.length);
|
|
|
|
|
System.arraycopy(B, 0, C, A.length, B.length);
|
|
|
|
|
return C;
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
/**
|
|
|
|
|
* Appends String(s) B to array A.
|
|
|
|
|
* @param A First array.
|
|
|
|
|
* @param B Strings to append.
|
|
|
|
|
* @return A with B(s) appended.
|
|
|
|
|
*/
|
|
|
|
|
public static String[] appendArray(String[] A, String... B) {
|
|
|
|
|
return concatArrays(A, B);
|
|
|
|
|
}
|
2009-11-24 10:34:48 +08:00
|
|
|
|
2010-11-13 04:14:28 +08:00
|
|
|
/**
|
|
|
|
|
* Returns indices of all occurrences of the specified symbol in the string
|
|
|
|
|
* @param s Search string
|
|
|
|
|
* @param ch Character to search for
|
|
|
|
|
* @return Indices of all occurrences of the specified symbol
|
|
|
|
|
*/
|
2009-05-30 04:15:00 +08:00
|
|
|
public static int[] indexOfAll(String s, int ch) {
|
2009-10-23 14:31:15 +08:00
|
|
|
int[] pos = new int[64];
|
|
|
|
|
int z = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < s.length(); i++) {
|
|
|
|
|
if (s.charAt(i) == ch) pos[z++] = i;
|
|
|
|
|
}
|
|
|
|
|
return reallocate(pos, z);
|
|
|
|
|
}
|
|
|
|
|
|
2011-12-15 04:31:09 +08:00
|
|
|
public static int countSetBits(boolean[] array) {
|
|
|
|
|
int counter = 0;
|
|
|
|
|
for ( int i = 0; i < array.length; i++ ) {
|
|
|
|
|
if ( array[i] )
|
|
|
|
|
counter++;
|
|
|
|
|
}
|
|
|
|
|
return counter;
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-23 14:31:15 +08:00
|
|
|
/**
|
|
|
|
|
* Returns new (reallocated) integer array of the specified size, with content
|
2009-05-30 04:15:00 +08:00
|
|
|
* of the original array <code>orig</code> copied into it. If <code>newSize</code> is
|
|
|
|
|
* less than the size of the original array, only first <code>newSize</code> elements will be copied.
|
|
|
|
|
* If new size is greater than the size of the original array, the content of the original array will be padded
|
2009-10-23 14:31:15 +08:00
|
|
|
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
|
|
|
|
|
* will be performed and the original array will be returned instead.
|
|
|
|
|
*
|
2010-11-13 04:14:28 +08:00
|
|
|
* @param orig Original size.
|
|
|
|
|
* @param newSize New Size.
|
2009-10-23 14:31:15 +08:00
|
|
|
*
|
2010-11-13 04:14:28 +08:00
|
|
|
* @return New array with length equal to newSize.
|
2009-05-30 04:15:00 +08:00
|
|
|
*/
|
|
|
|
|
public static int[] reallocate(int[] orig, int newSize) {
|
2009-10-23 14:31:15 +08:00
|
|
|
if (orig.length == newSize) return orig;
|
|
|
|
|
int[] new_array = new int[newSize];
|
|
|
|
|
int L = (newSize > orig.length ? orig.length : newSize);
|
|
|
|
|
for (int i = 0; i < L; i++) new_array[i] = orig[i];
|
|
|
|
|
return new_array;
|
2009-05-30 04:15:00 +08:00
|
|
|
}
|
2009-10-23 14:31:15 +08:00
|
|
|
|
2010-08-04 23:30:48 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the
|
|
|
|
|
* left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that
|
|
|
|
|
* if array a is being padded to the left, first (-n) elements of the returned array are v's, followed by the content of
|
|
|
|
|
* array a.
|
|
|
|
|
* @param a original array
|
|
|
|
|
* @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0)
|
2010-11-13 04:14:28 +08:00
|
|
|
* @param v element value
|
|
|
|
|
* @return the extended copy of array a with additional n elements
|
2010-08-04 23:30:48 +08:00
|
|
|
*/
|
|
|
|
|
public static byte [] extend(final byte[] a, int n, byte v) {
|
|
|
|
|
|
|
|
|
|
byte [] newA;
|
|
|
|
|
|
|
|
|
|
if ( n > 0 ) {
|
|
|
|
|
newA = Arrays.copyOf(a, a.length+n);
|
|
|
|
|
if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0
|
|
|
|
|
for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v;
|
|
|
|
|
}
|
|
|
|
|
return newA;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// we are here only if n < 0:
|
|
|
|
|
n = (-n);
|
|
|
|
|
newA = new byte[ a.length + n ];
|
|
|
|
|
int i;
|
|
|
|
|
if ( v!= 0 ) {
|
|
|
|
|
i = 0;
|
|
|
|
|
for( ; i < n; i++ ) newA[i] = v;
|
|
|
|
|
} else {
|
|
|
|
|
i = n;
|
|
|
|
|
}
|
|
|
|
|
for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j];
|
|
|
|
|
return newA;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns a copy of array a, extended with additional n elements to the right (if n > 0 ) or -n elements to the
|
|
|
|
|
* left (if n<0), copying the values form the original array. Newly added elements are filled with value v. Note that
|
|
|
|
|
* if array a is padded to the left, first (-n) elements of the returned array are v's, followed by the content of
|
|
|
|
|
* array a.
|
|
|
|
|
* @param a original array
|
|
|
|
|
* @param n number of (v-filled) elements to append to a on the right (n>0) or on the left (n<0)
|
2010-11-13 04:14:28 +08:00
|
|
|
* @param v element value
|
|
|
|
|
* @return the extended copy of array a with additional n elements
|
2010-08-04 23:30:48 +08:00
|
|
|
*/
|
|
|
|
|
public static short [] extend(final short[] a, int n, short v) {
|
|
|
|
|
|
|
|
|
|
short [] newA;
|
|
|
|
|
|
|
|
|
|
if ( n > 0 ) {
|
|
|
|
|
newA = Arrays.copyOf(a, a.length+n);
|
|
|
|
|
if ( v != 0) { // java pads with 0's for us, so there is nothing to do if v==0
|
|
|
|
|
for ( int i = a.length; i < newA.length ; i++ ) newA[i] = v;
|
|
|
|
|
}
|
|
|
|
|
return newA;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// we are here only if n < 0:
|
|
|
|
|
n = (-n);
|
|
|
|
|
newA = new short[ a.length + n ];
|
|
|
|
|
int i;
|
|
|
|
|
if ( v!= 0 ) {
|
|
|
|
|
i = 0;
|
|
|
|
|
for( ; i < n; i++ ) newA[i] = v;
|
|
|
|
|
} else {
|
|
|
|
|
i = n;
|
|
|
|
|
}
|
|
|
|
|
for ( int j = 0 ; j < a.length ; i++, j++) newA[i]=a[j];
|
|
|
|
|
return newA;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-17 23:15:21 +08:00
|
|
|
/* TEST ME
|
2009-10-23 14:31:15 +08:00
|
|
|
public static void main(String[] argv) {
|
|
|
|
|
List<Integer> l1 = new LinkedList<Integer>();
|
|
|
|
|
List<Integer> l2 = new ArrayList<Integer>();
|
|
|
|
|
|
|
|
|
|
l1.add(1);
|
|
|
|
|
l1.add(5);
|
|
|
|
|
l1.add(3);
|
|
|
|
|
l1.add(10);
|
|
|
|
|
l1.add(4);
|
|
|
|
|
l1.add(2);
|
|
|
|
|
l2.add(1);
|
|
|
|
|
l2.add(5);
|
|
|
|
|
l2.add(3);
|
|
|
|
|
l2.add(10);
|
|
|
|
|
l2.add(4);
|
|
|
|
|
l2.add(2);
|
|
|
|
|
|
|
|
|
|
Predicate<Integer> p = new Predicate<Integer>() {
|
|
|
|
|
public boolean apply(Integer i) {
|
|
|
|
|
return i > 2;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
filterInPlace(p, l1);
|
|
|
|
|
filterInPlace(p, l2);
|
|
|
|
|
|
|
|
|
|
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
|
|
|
|
|
System.out.println();
|
|
|
|
|
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
|
|
|
|
|
System.out.println();
|
|
|
|
|
|
|
|
|
|
}
|
2009-03-18 03:06:40 +08:00
|
|
|
|
2009-10-23 14:31:15 +08:00
|
|
|
*/
|
2009-03-18 03:06:40 +08:00
|
|
|
|
2009-10-23 14:31:15 +08:00
|
|
|
/**
|
|
|
|
|
* a helper method. Turns a single character string into a char.
|
|
|
|
|
*
|
|
|
|
|
* @param str the string
|
|
|
|
|
*
|
|
|
|
|
* @return a char
|
|
|
|
|
*/
|
|
|
|
|
public static char stringToChar(String str) {
|
|
|
|
|
if (str.length() != 1) throw new IllegalArgumentException("String length must be one");
|
|
|
|
|
return str.charAt(0);
|
2009-03-18 03:06:40 +08:00
|
|
|
}
|
|
|
|
|
|
2010-02-05 23:42:54 +08:00
|
|
|
public static <T extends Comparable<T>> List<T> sorted(Collection<T> c) {
|
2010-02-10 03:02:25 +08:00
|
|
|
return sorted(c, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static <T extends Comparable<T>> List<T> sorted(Collection<T> c, boolean reverse) {
|
2010-02-05 23:42:54 +08:00
|
|
|
List<T> l = new ArrayList<T>(c);
|
|
|
|
|
Collections.sort(l);
|
2010-02-10 03:02:25 +08:00
|
|
|
if ( reverse ) Collections.reverse(l);
|
2010-02-05 23:42:54 +08:00
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static <T extends Comparable<T>, V> List<V> sorted(Map<T,V> c) {
|
2010-02-10 03:02:25 +08:00
|
|
|
return sorted(c, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static <T extends Comparable<T>, V> List<V> sorted(Map<T,V> c, boolean reverse) {
|
2010-02-05 23:42:54 +08:00
|
|
|
List<T> t = new ArrayList<T>(c.keySet());
|
|
|
|
|
Collections.sort(t);
|
2010-02-10 03:02:25 +08:00
|
|
|
if ( reverse ) Collections.reverse(t);
|
2010-02-05 23:42:54 +08:00
|
|
|
|
|
|
|
|
List<V> l = new ArrayList<V>();
|
|
|
|
|
for ( T k : t ) {
|
|
|
|
|
l.add(c.get(k));
|
|
|
|
|
}
|
|
|
|
|
return l;
|
|
|
|
|
}
|
2009-10-23 14:31:15 +08:00
|
|
|
|
2010-02-05 23:42:54 +08:00
|
|
|
public static <T extends Comparable<T>, V> String sortedString(Map<T,V> c) {
|
|
|
|
|
List<T> t = new ArrayList<T>(c.keySet());
|
|
|
|
|
Collections.sort(t);
|
|
|
|
|
|
|
|
|
|
List<String> pairs = new ArrayList<String>();
|
|
|
|
|
for ( T k : t ) {
|
|
|
|
|
pairs.add(k + "=" + c.get(k));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return "{" + join(", ", pairs) + "}";
|
|
|
|
|
}
|
2010-05-20 22:05:13 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reverse a byte array of bases
|
|
|
|
|
*
|
|
|
|
|
* @param bases the byte array of bases
|
|
|
|
|
* @return the reverse of the base byte array
|
|
|
|
|
*/
|
|
|
|
|
static public byte[] reverse(byte[] bases) {
|
|
|
|
|
byte[] rcbases = new byte[bases.length];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < bases.length; i++) {
|
|
|
|
|
rcbases[i] = bases[bases.length - i - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rcbases;
|
|
|
|
|
}
|
|
|
|
|
|
2011-11-02 22:49:40 +08:00
|
|
|
static public final <T> List<T> reverse(final List<T> l) {
|
|
|
|
|
final List<T> newL = new ArrayList<T>(l);
|
|
|
|
|
Collections.reverse(newL);
|
|
|
|
|
return newL;
|
|
|
|
|
}
|
|
|
|
|
|
2010-05-20 22:05:13 +08:00
|
|
|
/**
|
|
|
|
|
* Reverse an int array of bases
|
|
|
|
|
*
|
|
|
|
|
* @param bases the int array of bases
|
|
|
|
|
* @return the reverse of the base int array
|
|
|
|
|
*/
|
|
|
|
|
static public int[] reverse(int[] bases) {
|
|
|
|
|
int[] rcbases = new int[bases.length];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < bases.length; i++) {
|
|
|
|
|
rcbases[i] = bases[bases.length - i - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rcbases;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reverse (NOT reverse-complement!!) a string
|
|
|
|
|
*
|
|
|
|
|
* @param bases input string
|
|
|
|
|
* @return the reversed string
|
|
|
|
|
*/
|
|
|
|
|
static public String reverse(String bases) {
|
|
|
|
|
return new String( reverse( bases.getBytes() )) ;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static byte[] charSeq2byteSeq(char[] seqIn) {
|
|
|
|
|
byte[] seqOut = new byte[seqIn.length];
|
|
|
|
|
for ( int i = 0; i < seqIn.length; i++ ) {
|
|
|
|
|
seqOut[i] = (byte)seqIn[i];
|
|
|
|
|
}
|
|
|
|
|
return seqOut;
|
2011-01-21 06:34:43 +08:00
|
|
|
}
|
2009-03-12 05:26:29 +08:00
|
|
|
|
2011-01-21 06:34:43 +08:00
|
|
|
public static boolean isFlagSet(int value, int flag) {
|
|
|
|
|
return ((value & flag) == flag);
|
|
|
|
|
}
|
2011-08-29 00:04:16 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Helper utility that calls into the InetAddress system to resolve the hostname. If this fails,
|
|
|
|
|
* unresolvable gets returned instead.
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static final String resolveHostname() {
|
|
|
|
|
try {
|
|
|
|
|
return InetAddress.getLocalHost().getCanonicalHostName();
|
|
|
|
|
}
|
|
|
|
|
catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw]
|
|
|
|
|
return "unresolvable";
|
|
|
|
|
// handle exception
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-12-16 02:09:46 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
public static byte [] arrayFromArrayWithLength(byte[] array, int length) {
|
|
|
|
|
byte [] output = new byte[length];
|
|
|
|
|
for (int j = 0; j < length; j++)
|
|
|
|
|
output[j] = array[(j % array.length)];
|
|
|
|
|
return output;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void fillArrayWithByte(byte[] array, byte value) {
|
|
|
|
|
for (int i=0; i<array.length; i++)
|
|
|
|
|
array[i] = value;
|
|
|
|
|
}
|
|
|
|
|
|
2012-11-14 04:21:57 +08:00
|
|
|
/**
|
|
|
|
|
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
|
|
|
|
|
* up the writer with the header and presorted status.
|
|
|
|
|
*
|
|
|
|
|
* @param toolkit the engine
|
|
|
|
|
* @param originalHeader original header
|
|
|
|
|
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
|
|
|
|
|
* @param programRecord the program record for this program
|
|
|
|
|
*/
|
|
|
|
|
public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, SAMProgramRecord programRecord) {
|
|
|
|
|
SAMFileHeader header = originalHeader.clone();
|
2012-03-17 02:09:07 +08:00
|
|
|
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
|
|
|
|
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
|
|
|
|
for ( SAMProgramRecord record : oldRecords )
|
2012-11-27 00:12:27 +08:00
|
|
|
if ( (programRecord != null && !record.getId().startsWith(programRecord.getId())) || KEEP_ALL_PG_RECORDS )
|
2012-03-17 02:09:07 +08:00
|
|
|
newRecords.add(record);
|
|
|
|
|
|
2012-11-27 00:12:27 +08:00
|
|
|
if (programRecord != null) {
|
|
|
|
|
newRecords.add(programRecord);
|
|
|
|
|
header.setProgramRecords(newRecords);
|
|
|
|
|
}
|
2012-11-14 04:21:57 +08:00
|
|
|
return header;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and returns
|
|
|
|
|
* the new header to be added to the BAM writer.
|
|
|
|
|
*
|
|
|
|
|
* @param toolkit the engine
|
|
|
|
|
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
|
|
|
|
|
* @param walker the walker object (so we can extract the command line)
|
|
|
|
|
* @param PROGRAM_RECORD_NAME the name for the PG tag
|
|
|
|
|
* @return a pre-filled header for the bam writer
|
|
|
|
|
*/
|
|
|
|
|
public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
|
|
|
|
|
final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
|
|
|
|
|
return setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, programRecord);
|
|
|
|
|
}
|
2012-03-17 02:09:07 +08:00
|
|
|
|
2012-11-14 04:21:57 +08:00
|
|
|
/**
|
|
|
|
|
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
|
|
|
|
|
* up the writer with the header and presorted status.
|
|
|
|
|
*
|
|
|
|
|
* @param writer BAM file writer
|
|
|
|
|
* @param toolkit the engine
|
|
|
|
|
* @param preSorted whether or not the writer can assume reads are going to be added are already sorted
|
|
|
|
|
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
|
|
|
|
|
* @param walker the walker object (so we can extract the command line)
|
|
|
|
|
* @param PROGRAM_RECORD_NAME the name for the PG tag
|
|
|
|
|
*/
|
|
|
|
|
public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
|
|
|
|
|
SAMFileHeader header = setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, walker, PROGRAM_RECORD_NAME);
|
2012-03-17 02:09:07 +08:00
|
|
|
writer.writeHeader(header);
|
|
|
|
|
writer.setPresorted(preSorted);
|
|
|
|
|
}
|
2012-11-14 04:21:57 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Creates a program record (@PG) tag
|
|
|
|
|
*
|
|
|
|
|
* @param toolkit the engine
|
|
|
|
|
* @param walker the walker object (so we can extract the command line)
|
|
|
|
|
* @param PROGRAM_RECORD_NAME the name for the PG tag
|
|
|
|
|
* @return a program record for the tool
|
|
|
|
|
*/
|
2012-03-17 02:09:07 +08:00
|
|
|
public static SAMProgramRecord createProgramRecord(GenomeAnalysisEngine toolkit, Object walker, String PROGRAM_RECORD_NAME) {
|
|
|
|
|
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
|
|
|
|
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
|
|
|
|
try {
|
|
|
|
|
final String version = headerInfo.getString("org.broadinstitute.sting.gatk.version");
|
|
|
|
|
programRecord.setProgramVersion(version);
|
|
|
|
|
} catch (MissingResourceException e) {
|
|
|
|
|
// couldn't care less if the resource is missing...
|
|
|
|
|
}
|
|
|
|
|
programRecord.setCommandLine(toolkit.createApproximateCommandLineArgumentString(toolkit, walker));
|
|
|
|
|
return programRecord;
|
|
|
|
|
}
|
|
|
|
|
|
2012-03-27 23:54:58 +08:00
|
|
|
public static <E> Collection<E> makeCollection(Iterable<E> iter) {
|
|
|
|
|
Collection<E> list = new ArrayList<E>();
|
|
|
|
|
for (E item : iter) {
|
|
|
|
|
list.add(item);
|
|
|
|
|
}
|
|
|
|
|
return list;
|
|
|
|
|
}
|
2012-03-29 00:55:29 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the number of combinations represented by this collection
|
|
|
|
|
* of collection of options.
|
|
|
|
|
*
|
|
|
|
|
* For example, if this is [[A, B], [C, D], [E, F, G]] returns 2 * 2 * 3 = 12
|
|
|
|
|
*
|
|
|
|
|
* @param options
|
|
|
|
|
* @param <T>
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
@Requires("options != null")
|
|
|
|
|
public static <T> int nCombinations(final Collection<T>[] options) {
|
|
|
|
|
int nStates = 1;
|
|
|
|
|
for ( Collection<T> states : options ) {
|
|
|
|
|
nStates *= states.size();
|
|
|
|
|
}
|
|
|
|
|
return nStates;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Requires("options != null")
|
|
|
|
|
public static <T> int nCombinations(final List<List<T>> options) {
|
|
|
|
|
if ( options.isEmpty() )
|
|
|
|
|
return 0;
|
|
|
|
|
else {
|
|
|
|
|
int nStates = 1;
|
|
|
|
|
for ( Collection<T> states : options ) {
|
|
|
|
|
nStates *= states.size();
|
|
|
|
|
}
|
|
|
|
|
return nStates;
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-03-31 03:28:47 +08:00
|
|
|
|
2012-08-15 03:02:45 +08:00
|
|
|
/**
|
|
|
|
|
* Make all combinations of N size of objects
|
|
|
|
|
*
|
|
|
|
|
* if objects = [A, B, C]
|
|
|
|
|
* if N = 1 => [[A], [B], [C]]
|
|
|
|
|
* if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]]
|
|
|
|
|
*
|
|
|
|
|
* @param objects
|
|
|
|
|
* @param n
|
|
|
|
|
* @param <T>
|
2012-08-16 02:36:06 +08:00
|
|
|
* @param withReplacement if false, the resulting permutations will only contain unique objects from objects
|
2012-08-15 03:02:45 +08:00
|
|
|
* @return
|
|
|
|
|
*/
|
2012-08-16 02:36:06 +08:00
|
|
|
public static <T> List<List<T>> makePermutations(final List<T> objects, final int n, final boolean withReplacement) {
|
2012-08-15 03:02:45 +08:00
|
|
|
final List<List<T>> combinations = new ArrayList<List<T>>();
|
|
|
|
|
|
2012-08-16 02:36:06 +08:00
|
|
|
if ( n <= 0 )
|
|
|
|
|
;
|
|
|
|
|
else if ( n == 1 ) {
|
2012-08-15 03:02:45 +08:00
|
|
|
for ( final T o : objects )
|
|
|
|
|
combinations.add(Collections.singletonList(o));
|
|
|
|
|
} else {
|
2012-08-16 02:36:06 +08:00
|
|
|
final List<List<T>> sub = makePermutations(objects, n - 1, withReplacement);
|
2012-08-15 03:02:45 +08:00
|
|
|
for ( List<T> subI : sub ) {
|
|
|
|
|
for ( final T a : objects ) {
|
2012-08-16 02:36:06 +08:00
|
|
|
if ( withReplacement || ! subI.contains(a) )
|
|
|
|
|
combinations.add(Utils.cons(a, subI));
|
2012-08-15 03:02:45 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return combinations;
|
|
|
|
|
}
|
|
|
|
|
|
2012-03-31 03:28:47 +08:00
|
|
|
/**
|
|
|
|
|
* Convenience function that formats the novelty rate as a %.2f string
|
|
|
|
|
*
|
|
|
|
|
* @param known number of variants from all that are known
|
|
|
|
|
* @param all number of all variants
|
|
|
|
|
* @return a String novelty rate, or NA if all == 0
|
|
|
|
|
*/
|
|
|
|
|
public static String formattedNoveltyRate(final int known, final int all) {
|
|
|
|
|
return formattedPercent(all - known, all);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convenience function that formats the novelty rate as a %.2f string
|
|
|
|
|
*
|
|
|
|
|
* @param x number of objects part of total that meet some criteria
|
|
|
|
|
* @param total count of all objects, including x
|
|
|
|
|
* @return a String percent rate, or NA if total == 0
|
|
|
|
|
*/
|
|
|
|
|
public static String formattedPercent(final long x, final long total) {
|
|
|
|
|
return total == 0 ? "NA" : String.format("%.2f", (100.0*x) / total);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convenience function that formats a ratio as a %.2f string
|
|
|
|
|
*
|
|
|
|
|
* @param num number of observations in the numerator
|
|
|
|
|
* @param denom number of observations in the denumerator
|
|
|
|
|
* @return a String formatted ratio, or NA if all == 0
|
|
|
|
|
*/
|
|
|
|
|
public static String formattedRatio(final long num, final long denom) {
|
|
|
|
|
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
|
|
|
|
}
|
2012-04-11 21:41:45 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a constant map that maps each value in values to itself
|
|
|
|
|
* @param values
|
|
|
|
|
* @param <T>
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static <T> Map<T, T> makeIdentityFunctionMap(Collection<T> values) {
|
|
|
|
|
Map<T,T> map = new HashMap<T, T>(values.size());
|
|
|
|
|
for ( final T value : values )
|
|
|
|
|
map.put(value, value);
|
|
|
|
|
return Collections.unmodifiableMap(map);
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-25 03:34:23 +08:00
|
|
|
/**
|
|
|
|
|
* Divides the input list into a list of sublists, which contains group size elements (except potentially the last one)
|
|
|
|
|
*
|
|
|
|
|
* list = [A, B, C, D, E]
|
|
|
|
|
* groupSize = 2
|
|
|
|
|
* result = [[A, B], [C, D], [E]]
|
|
|
|
|
*
|
|
|
|
|
* @param list
|
|
|
|
|
* @param groupSize
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static <T> List<List<T>> groupList(final List<T> list, final int groupSize) {
|
|
|
|
|
if ( groupSize < 1 ) throw new IllegalArgumentException("groupSize >= 1");
|
|
|
|
|
|
|
|
|
|
final List<List<T>> subLists = new LinkedList<List<T>>();
|
|
|
|
|
int n = list.size();
|
|
|
|
|
for ( int i = 0; i < n; i += groupSize ) {
|
|
|
|
|
subLists.add(list.subList(i, Math.min(i + groupSize, n)));
|
|
|
|
|
}
|
|
|
|
|
return subLists;
|
|
|
|
|
}
|
2012-11-14 04:21:57 +08:00
|
|
|
|
2011-01-21 06:34:43 +08:00
|
|
|
}
|