Added ReadLikelihoods component to substitute Map<String,PerReadAlleleLikelihoodMap>.

It uses a more efficient java array[] based implementation and encapsulates operations perform with such a
read-likelihood collection such as marginalization, filtering by position, poor modeling or capping
worst likelihoods and so forth.

Stories:

          https://www.pivotaltracker.com/story/show/70222086
          https://www.pivotaltracker.com/story/show/67961652
This commit is contained in:
Valentin Ruano-Rubio 2014-07-24 23:42:53 -04:00
parent c56e493f98
commit 09ac3779d6
2 changed files with 1757 additions and 0 deletions

View File

@ -34,6 +34,7 @@ import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
import org.broadinstitute.gatk.utils.text.TextFormattingUtils;
import java.lang.reflect.Array;
import java.math.BigInteger;
import java.net.InetAddress;
import java.security.MessageDigest;
@ -882,4 +883,258 @@ public class Utils {
return false;
return true;
}
/**
* Skims out positions of an array returning a shorter one with the remaning positions in the same order.
* @param original the original array to splice.
* @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}),
* or retained ({@code false})
*
* @param <T> the array type.
*
* @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null},
* or {@code remove length is different to {@code original}'s}, or {@code original} is not in
* fact an array.
*
* @return never {@code null}.
*/
public static <T> T skimArray(final T original, final boolean[] remove) {
return skimArray(original,0,null,0,remove,0);
}
/**
* Skims out positions of an array returning a shorter one with the remaning positions in the same order.
*
* <p>
* If the {@code dest} array provide is not long enough a new one will be created and returned with the
* same component type. All elements before {@code destOffset} will be copied from the input to the
* result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where
* the position preceding {@code destOffset} will be left with the default value. The component type
* Will match the one of the {@code source} array.
* </p>
*
* @param source the original array to splice.
* @param sourceOffset the first position to skim.
* @param dest the destination array.
* @param destOffset the first position where to copy the skimed array values.
* @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}),
* or retained ({@code false})
* @param removeOffset the first position in the remove index array to consider.
*
* @param <T> the array type.
*
* @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null},
* or {@code remove length is different to {@code original}'s}, or {@code original} is not in
* fact an array.
*
* @return never {@code null}.
*/
public static <T> T skimArray(final T source, final int sourceOffset, final T dest, final int destOffset, final boolean[] remove, final int removeOffset) {
if (source == null)
throw new IllegalArgumentException("the source array cannot be null");
@SuppressWarnings("unchecked")
final Class<T> sourceClazz = (Class<T>) source.getClass();
if (!sourceClazz.isArray())
throw new IllegalArgumentException("the source array is not in fact an array instance");
final int length = Array.getLength(source) - sourceOffset;
if (length < 0)
throw new IllegalArgumentException("the source offset goes beyond the source array length");
return skimArray(source,sourceOffset,dest,destOffset,remove,removeOffset,length);
}
/**
* Skims out positions of an array returning a shorter one with the remaning positions in the same order.
*
* <p>
* If the {@code dest} array provide is not long enough a new one will be created and returned with the
* same component type. All elements before {@code destOffset} will be copied from the input to the
* result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where
* the position preceding {@code destOffset} will be left with the default value. The component type
* Will match the one of the {@code source} array.
* </p>
*
* @param source the original array to splice.
* @param sourceOffset the first position to skim.
* @param dest the destination array.
* @param destOffset the first position where to copy the skimed array values.
* @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}),
* or retained ({@code false})
* @param removeOffset the first position in the remove index array to consider.
* @param length the total number of position in {@code source} to consider. Thus only the {@code sourceOffset} to
* {@code sourceOffset + length - 1} region will be skimmed.
*
* @param <T> the array type.
*
* @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null},
* or {@code remove length is different to {@code original}'s}, or {@code original} is not in
* fact an array.
*
* @return never {@code null}.
*/
public static <T> T skimArray(final T source, final int sourceOffset, final T dest, final int destOffset,
final boolean[] remove, final int removeOffset, final int length) {
if (source == null)
throw new IllegalArgumentException("the source array cannot be null");
if (remove == null)
throw new IllegalArgumentException("the remove array cannot be null");
if (sourceOffset < 0)
throw new IllegalArgumentException("the source array offset cannot be negative");
if (destOffset < 0)
throw new IllegalArgumentException("the destination array offset cannot be negative");
if (removeOffset < 0)
throw new IllegalArgumentException("the remove array offset cannot be negative");
if (length < 0)
throw new IllegalArgumentException("the length provided cannot be negative");
final int removeLength = Math.min(remove.length - removeOffset,length);
if (removeLength < 0)
throw new IllegalArgumentException("the remove offset provided falls beyond the remove array end");
@SuppressWarnings("unchecked")
final Class<T> sourceClazz = (Class<T>) source.getClass();
if (!sourceClazz.isArray())
throw new IllegalArgumentException("the source array is not in fact an array instance");
final Class<T> destClazz = skimArrayDetermineDestArrayClass(dest, sourceClazz);
final int sourceLength = Array.getLength(source);
if (sourceLength < length + sourceOffset)
throw new IllegalArgumentException("the source array is too small considering length and offset");
// count how many positions are to be removed.
int removeCount = 0;
final int removeEnd = removeLength + removeOffset;
for (int i = removeOffset; i < removeEnd; i++)
if (remove[i]) removeCount++;
final int newLength = length - removeCount;
@SuppressWarnings("unchecked")
final T result = skimArrayBuildResultArray(dest, destOffset, destClazz, newLength);
// No removals, just copy the whole thing.
if (removeCount == 0)
System.arraycopy(source,sourceOffset,result,destOffset,length);
else if (length > 0) { // if length == 0 nothing to do.
int nextOriginalIndex = 0;
int nextNewIndex = 0;
int nextRemoveIndex = removeOffset;
while (nextOriginalIndex < length && nextNewIndex < newLength) {
while (nextRemoveIndex < removeEnd && remove[nextRemoveIndex++]) { nextOriginalIndex++; } // skip positions to be spliced.
// Since we make the nextNewIndex < newLength check in the while condition
// there is no need to include the following break, as is guaranteed not to be true:
// if (nextOriginalIndex >= length) break; // we reach the final (last positions are to be spliced.
final int copyStart = nextOriginalIndex;
while (++nextOriginalIndex < length && (nextRemoveIndex >= removeEnd || !remove[nextRemoveIndex])) { nextRemoveIndex++; }
final int copyEnd = nextOriginalIndex;
final int copyLength = copyEnd - copyStart;
System.arraycopy(source, sourceOffset + copyStart, result, destOffset + nextNewIndex, copyLength);
nextNewIndex += copyLength;
}
}
return result;
}
private static <T> T skimArrayBuildResultArray(final T dest, final int destOffset, final Class<T> destClazz, final int newLength) {
@SuppressWarnings("unchecked")
final T result;
if (dest == null)
result = (T) Array.newInstance(destClazz.getComponentType(), newLength + destOffset);
else if (Array.getLength(dest) < newLength + destOffset) {
result = (T) Array.newInstance(destClazz.getComponentType(),newLength + destOffset);
if (destOffset > 0) System.arraycopy(dest,0,result,0,destOffset);
} else
result = dest;
return result;
}
private static <T> Class<T> skimArrayDetermineDestArrayClass(final T dest, Class<T> sourceClazz) {
final Class<T> destClazz;
if (dest == null)
destClazz = sourceClazz;
else {
destClazz = (Class<T>) dest.getClass();
if (destClazz != sourceClazz) {
if (!destClazz.isArray())
throw new IllegalArgumentException("the destination array class must be an array");
if (sourceClazz.getComponentType().isAssignableFrom(destClazz.getComponentType()))
throw new IllegalArgumentException("the provided destination array class cannot contain values from the source due to type incompatibility");
}
}
return destClazz;
}
/**
* Makes a deep clone of the array provided.
*
* <p>
* When you can use {@link Arrays#copyOf} or an array {@link Object#clone()} to create a copy of itself,
* if it is multi-dimentional each sub array or matrix would be cloned.
* </p>
*
* <p>
* Notice however that if the base type is an Object type, the base elements themselves wont be cloned.
* </p>
*
* @param array the array to deep-clone.
* @param <T> type of the array.
*
* @throws IllegalArgumentException if {@code array} is {@code null} or is not an array.
*/
public static <T> T deepCloneArray(final T array) {
if (array == null)
throw new IllegalArgumentException("");
@SuppressWarnings("unchecked")
final Class<T> clazz = (Class<T>) array.getClass();
if (!clazz.isArray())
throw new IllegalArgumentException("the input is not an array");
final int dimension = calculateArrayDimensions(clazz);
return deepCloneArrayUnchecked(array,clazz, dimension);
}
private static int calculateArrayDimensions(final Class<?> clazz) {
if (clazz.isArray())
return calculateArrayDimensions(clazz.getComponentType()) + 1;
else
return 0;
}
private static <T> T deepCloneArrayUnchecked(final T array, final Class<T> clazz, final int dimension) {
final int length = Array.getLength(array);
final Class componentClass = clazz.getComponentType();
final T result = (T) Array.newInstance(componentClass,length);
if (dimension <= 1) {
System.arraycopy(array, 0, result, 0, length);
return result;
}
final int dimensionMinus1 = dimension - 1;
for (int i = 0; i < length; i++)
Array.set(result,i,deepCloneArrayUnchecked(Array.get(array,i),componentClass,dimensionMinus1));
return result;
}
}