From 09ac3779d63625f390f42a50be8b5a72e84c7718 Mon Sep 17 00:00:00 2001
From: Valentin Ruano-Rubio <valentin@broadinstitute.org>
Date: Thu, 24 Jul 2014 23:42:53 -0400
Subject: [PATCH] Added ReadLikelihoods component to substitute
 Map<String,PerReadAlleleLikelihoodMap>. It uses a more efficient java array[]
 based implementation and encapsulates operations perform with such a
 read-likelihood collection such as marginalization, filtering by position,
 poor modeling or capping worst likelihoods and so forth.

Stories:

          https://www.pivotaltracker.com/story/show/70222086
          https://www.pivotaltracker.com/story/show/67961652
---
 .../org/broadinstitute/gatk/utils/Utils.java  |  255 +++
 .../gatk/utils/genotyper/ReadLikelihoods.java | 1502 +++++++++++++++++
 2 files changed, 1757 insertions(+)
 create mode 100644 public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/Utils.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/Utils.java
index 8fdec27b4..b4c0c75fa 100644
--- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/Utils.java
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/Utils.java
@@ -34,6 +34,7 @@ import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
 import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
 import org.broadinstitute.gatk.utils.text.TextFormattingUtils;
 
+import java.lang.reflect.Array;
 import java.math.BigInteger;
 import java.net.InetAddress;
 import java.security.MessageDigest;
@@ -882,4 +883,258 @@ public class Utils {
                 return false;
         return true;
     }
+
+    /**
+     * Skims out positions of an array returning a shorter one with the remaning positions in the same order.
+     * @param original the original array to splice.
+     * @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}),
+     *               or retained ({@code false})
+     *
+     * @param <T> the array type.
+     *
+     * @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null},
+     *    or {@code remove length is different to {@code original}'s}, or {@code original} is not in
+     *    fact an array.
+     *
+     * @return never {@code null}.
+     */
+    public static <T> T skimArray(final T original, final boolean[] remove) {
+        return skimArray(original,0,null,0,remove,0);
+    }
+
+    /**
+     * Skims out positions of an array returning a shorter one with the remaning positions in the same order.
+     *
+     * <p>
+     *     If the {@code dest} array provide is not long enough a new one will be created and returned with the
+     *     same component type. All elements before {@code destOffset} will be copied from the input to the
+     *     result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where
+     *     the position preceding {@code destOffset} will be left with the default value. The component type
+     *     Will match the one of the {@code source} array.
+     * </p>
+     *
+     * @param source the original array to splice.
+     * @param sourceOffset the first position to skim.
+     * @param dest the destination array.
+     * @param destOffset the first position where to copy the skimed array values.
+     * @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}),
+     *               or retained ({@code false})
+     * @param removeOffset the first position in the remove index array to consider.
+     *
+     * @param <T> the array type.
+     *
+     * @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null},
+     *    or {@code remove length is different to {@code original}'s}, or {@code original} is not in
+     *    fact an array.
+     *
+     * @return never {@code null}.
+     */
+    public static <T> T skimArray(final T source, final int sourceOffset, final T dest, final int destOffset, final boolean[] remove, final int removeOffset) {
+        if (source == null)
+            throw new IllegalArgumentException("the source array cannot be null");
+        @SuppressWarnings("unchecked")
+        final Class<T> sourceClazz = (Class<T>) source.getClass();
+
+        if (!sourceClazz.isArray())
+            throw new IllegalArgumentException("the source array is not in fact an array instance");
+        final int length = Array.getLength(source) - sourceOffset;
+        if (length < 0)
+            throw new IllegalArgumentException("the source offset goes beyond the source array length");
+        return skimArray(source,sourceOffset,dest,destOffset,remove,removeOffset,length);
+    }
+
+    /**
+     * Skims out positions of an array returning a shorter one with the remaning positions in the same order.
+     *
+     * <p>
+     *     If the {@code dest} array provide is not long enough a new one will be created and returned with the
+     *     same component type. All elements before {@code destOffset} will be copied from the input to the
+     *     result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where
+     *     the position preceding {@code destOffset} will be left with the default value. The component type
+     *     Will match the one of the {@code source} array.
+     * </p>
+     *
+     * @param source the original array to splice.
+     * @param sourceOffset the first position to skim.
+     * @param dest the destination array.
+     * @param destOffset the first position where to copy the skimed array values.
+     * @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}),
+     *               or retained ({@code false})
+     * @param removeOffset the first position in the remove index array to consider.
+     * @param length the total number of position in {@code source} to consider. Thus only the {@code sourceOffset} to
+     *               {@code sourceOffset + length - 1} region will be skimmed.
+     *
+     * @param <T> the array type.
+     *
+     * @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null},
+     *    or {@code remove length is different to {@code original}'s}, or {@code original} is not in
+     *    fact an array.
+     *
+     * @return never {@code null}.
+     */
+    public static <T> T skimArray(final T source, final int sourceOffset, final T dest, final int destOffset,
+                                  final boolean[] remove, final int removeOffset, final int length) {
+        if (source == null)
+            throw new IllegalArgumentException("the source array cannot be null");
+        if (remove == null)
+            throw new IllegalArgumentException("the remove array cannot be null");
+        if (sourceOffset < 0)
+            throw new IllegalArgumentException("the source array offset cannot be negative");
+        if (destOffset < 0)
+            throw new IllegalArgumentException("the destination array offset cannot be negative");
+        if (removeOffset < 0)
+            throw new IllegalArgumentException("the remove array offset cannot be negative");
+        if (length < 0)
+            throw new IllegalArgumentException("the length provided cannot be negative");
+
+        final int removeLength = Math.min(remove.length - removeOffset,length);
+
+        if (removeLength < 0)
+            throw new IllegalArgumentException("the remove offset provided falls beyond the remove array end");
+
+
+        @SuppressWarnings("unchecked")
+        final Class<T> sourceClazz = (Class<T>) source.getClass();
+
+        if (!sourceClazz.isArray())
+            throw new IllegalArgumentException("the source array is not in fact an array instance");
+
+        final Class<T> destClazz = skimArrayDetermineDestArrayClass(dest, sourceClazz);
+
+        final int sourceLength = Array.getLength(source);
+
+        if (sourceLength < length + sourceOffset)
+            throw new IllegalArgumentException("the source array is too small considering length and offset");
+
+        // count how many positions are to be removed.
+
+        int removeCount = 0;
+
+        final int removeEnd = removeLength + removeOffset;
+        for (int i = removeOffset; i < removeEnd; i++)
+            if  (remove[i]) removeCount++;
+
+
+        final int newLength = length - removeCount;
+
+
+        @SuppressWarnings("unchecked")
+        final T result = skimArrayBuildResultArray(dest, destOffset, destClazz, newLength);
+        // No removals, just copy the whole thing.
+
+        if (removeCount == 0)
+            System.arraycopy(source,sourceOffset,result,destOffset,length);
+        else if (length > 0) {  // if length == 0 nothing to do.
+            int nextOriginalIndex = 0;
+            int nextNewIndex = 0;
+            int nextRemoveIndex = removeOffset;
+            while (nextOriginalIndex < length && nextNewIndex < newLength) {
+                while (nextRemoveIndex < removeEnd && remove[nextRemoveIndex++]) { nextOriginalIndex++; } // skip positions to be spliced.
+                // Since we make the nextNewIndex < newLength check in the while condition
+                // there is no need to include the following break, as is guaranteed not to be true:
+                // if (nextOriginalIndex >= length) break; // we reach the final (last positions are to be spliced.
+                final int copyStart = nextOriginalIndex;
+                while (++nextOriginalIndex < length && (nextRemoveIndex >= removeEnd || !remove[nextRemoveIndex])) { nextRemoveIndex++; }
+                final int copyEnd = nextOriginalIndex;
+                final int copyLength = copyEnd - copyStart;
+                System.arraycopy(source, sourceOffset + copyStart, result, destOffset + nextNewIndex, copyLength);
+                nextNewIndex += copyLength;
+            }
+        }
+        return result;
+    }
+
+    private static <T> T skimArrayBuildResultArray(final T dest, final int destOffset, final Class<T> destClazz, final int newLength) {
+        @SuppressWarnings("unchecked")
+        final T result;
+
+        if (dest == null)
+            result = (T) Array.newInstance(destClazz.getComponentType(), newLength + destOffset);
+        else if (Array.getLength(dest) < newLength + destOffset) {
+            result = (T) Array.newInstance(destClazz.getComponentType(),newLength + destOffset);
+            if (destOffset > 0) System.arraycopy(dest,0,result,0,destOffset);
+        } else
+            result = dest;
+        return result;
+    }
+
+    private static <T> Class<T> skimArrayDetermineDestArrayClass(final T dest, Class<T> sourceClazz) {
+        final Class<T> destClazz;
+        if (dest == null)
+            destClazz = sourceClazz;
+        else {
+            destClazz = (Class<T>) dest.getClass();
+            if (destClazz != sourceClazz) {
+                if (!destClazz.isArray())
+                    throw new IllegalArgumentException("the destination array class must be an array");
+                if (sourceClazz.getComponentType().isAssignableFrom(destClazz.getComponentType()))
+                    throw new IllegalArgumentException("the provided destination array class cannot contain values from the source due to type incompatibility");
+            }
+        }
+        return destClazz;
+    }
+
+    /**
+     * Makes a deep clone of the array provided.
+     *
+     * <p>
+     *     When you can use {@link Arrays#copyOf} or an array {@link Object#clone()}  to create a copy of itself,
+     *     if it is multi-dimentional each sub array or matrix would be cloned.
+     * </p>
+     *
+     * <p>
+     *     Notice however that if the base type is an Object type, the base elements themselves wont be cloned.
+     * </p>
+     *
+     * @param array the array to deep-clone.
+     * @param <T> type of the array.
+     *
+     * @throws IllegalArgumentException if {@code array} is {@code null} or is not an array.
+     */
+    public static <T>  T deepCloneArray(final T array) {
+
+        if (array == null)
+            throw new IllegalArgumentException("");
+
+        @SuppressWarnings("unchecked")
+        final Class<T> clazz = (Class<T>) array.getClass();
+
+
+        if (!clazz.isArray())
+            throw new IllegalArgumentException("the input is not an array");
+
+        final int dimension = calculateArrayDimensions(clazz);
+
+        return deepCloneArrayUnchecked(array,clazz, dimension);
+    }
+
+    private static int calculateArrayDimensions(final Class<?> clazz) {
+        if (clazz.isArray())
+            return calculateArrayDimensions(clazz.getComponentType()) + 1;
+        else
+            return 0;
+    }
+
+    private static <T> T deepCloneArrayUnchecked(final T array, final Class<T> clazz, final int dimension) {
+
+
+        final int length = Array.getLength(array);
+
+        final Class componentClass = clazz.getComponentType();
+
+        final T result = (T) Array.newInstance(componentClass,length);
+
+        if (dimension <= 1) {
+            System.arraycopy(array, 0, result, 0, length);
+            return result;
+        }
+
+
+        final int dimensionMinus1 = dimension - 1;
+
+        for (int i = 0; i < length; i++)
+            Array.set(result,i,deepCloneArrayUnchecked(Array.get(array,i),componentClass,dimensionMinus1));
+
+        return result;
+    }
 }
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java
new file mode 100644
index 000000000..f03dfbe78
--- /dev/null
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoods.java
@@ -0,0 +1,1502 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package org.broadinstitute.gatk.utils.genotyper;
+
+import htsjdk.variant.variantcontext.Allele;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.objects.Object2IntMap;
+import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
+import org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtils;
+import org.broadinstitute.gatk.utils.GenomeLoc;
+import org.broadinstitute.gatk.utils.Utils;
+import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
+import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
+
+import java.util.*;
+
+/**
+ * Read-likelihoods container implementation based on integer indexed arrays.
+ *
+ * @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
+ */
+public class ReadLikelihoods<A extends Allele> implements Cloneable {
+
+    /**
+     * Reads by sample index. Each sub array contains reference to the reads of the ith sample.
+     */
+    private GATKSAMRecord[][] readsBySampleIndex;
+
+    /**
+     * Indexed per sample, allele and finally read (within sample).
+     * <p>
+     *     valuesBySampleIndex[s][a][r] == lnLk(R_r | A_a) where R_r comes from Sample s.
+     * </p>
+     */
+    private double[][][] valuesBySampleIndex;
+
+    /**
+     * Sorted list samples.
+     */
+    private final String[] samples;
+
+    /**
+     * Unmodifiable list version of the samples
+     */
+    private List<String> sampleList;
+
+    /**
+     * Unmodifiable list version of the alleles
+     */
+    private List<A> alleleList;
+
+    /**
+     * Sorted array of alleles.
+     */
+    private A[] alleles;
+
+    /**
+     * Maps from sample name to its index.
+     */
+    private final Object2IntMap<String> sampleIndex;
+
+    /**
+     * Maps from allele to its index.
+     */
+    private final Object2IntMap<A> alleleIndex;
+
+    /**
+     * Maps from each read to its index within the sample.
+     */
+    private final Object2IntMap<GATKSAMRecord>[] readIndexBySampleIndex;
+
+    /**
+     * Index of the reference allele if any, otherwise -1.
+     */
+    private final int referenceAlleleIndex;
+
+    /**
+     * Caches the read-list per sample list returned by {@link #sampleReads}
+     */
+    private final List<GATKSAMRecord>[] readListBySampleIndex;
+
+    /**
+     * Sample matrices.
+     */
+    private final Matrix<A>[] sampleMatrices;
+
+    /**
+     * Constructs a new read-likelihood collection.
+     *
+     * <p>
+     *     The initial likelihoods for all allele-read combinations are
+     *     0.
+     * </p>
+     *
+     * @param samples all supported samples in the collection.
+     * @param alleles all supported alleles in the collection.
+     * @param reads reads stratified per sample.
+     *
+     * @throws IllegalArgumentException if any of {@code allele}, {@code samples}
+     * or {@code reads} is {@code null},
+     *  or if they contain null values.
+     */
+    @SuppressWarnings("unchecked")
+    public ReadLikelihoods(final List<String> samples, final List<A> alleles,
+                           final Map<String, List<GATKSAMRecord>> reads) {
+        if (alleles == null)
+            throw new IllegalArgumentException("allele list cannot be null");
+        if (samples == null)
+            throw new IllegalArgumentException("sample list cannot be null");
+        if (reads == null)
+            throw new IllegalArgumentException("read map cannot be null");
+
+        final int sampleCount = samples.size();
+        final int alleleCount = alleles.size();
+        this.alleles = alleles.toArray((A[]) new Allele[alleleCount]);
+        this.samples = samples.toArray(new String[sampleCount]);
+        readsBySampleIndex = new GATKSAMRecord[sampleCount][];
+        readListBySampleIndex = new List[sampleCount];
+        valuesBySampleIndex = new double[sampleCount][][];
+        referenceAlleleIndex = findReferenceAllele(this.alleles);
+
+        sampleIndex = new Object2IntOpenHashMap<>(sampleCount);
+        alleleIndex = new Object2IntOpenHashMap<>(alleleCount);
+        readIndexBySampleIndex = new Object2IntMap[sampleCount];
+
+        setupIndexes(reads, sampleCount, alleleCount);
+
+        sampleMatrices = (Matrix<A>[]) new Matrix[sampleCount];
+    }
+
+    // Add all the indices to alleles, sample and reads in the look-up maps.
+    private void setupIndexes(final Map<String, List<GATKSAMRecord>> reads, final int sampleCount, final int alleleCount) {
+        alleleIndex.clear();
+        for (int i = 0; i < alleleCount; i++) {
+            final A allele = alleles[i];
+            if (allele == null)
+                throw new IllegalArgumentException("no allele can be null");
+            alleleIndex.put(allele, i);
+        }
+        if (alleleIndex.size() != alleleCount)
+            throw new IllegalArgumentException("repeated alleles");
+
+        sampleIndex.clear();
+        for (int i = 0; i < sampleCount; i++) {
+            setupSampleData(i, reads, alleleCount);
+        }
+
+        if (sampleIndex.size() != sampleCount)
+            throw new IllegalArgumentException("repeated sample names");
+    }
+
+    // Assumes that {@link #samples} has been initialized with the sample names.
+    private void setupSampleData(final int sampleIndex, final Map<String, List<GATKSAMRecord>> readsBySample,
+                                 final int alleleCount) {
+        final String sample = this.samples[sampleIndex];
+        if (sample == null)
+            throw new IllegalArgumentException("no sample can be null");
+        this.sampleIndex.put(sample, sampleIndex);
+        final List<GATKSAMRecord> reads = readsBySample.get(sample);
+        readsBySampleIndex[sampleIndex] = reads == null
+                ? new GATKSAMRecord[0]
+                : reads.toArray(new GATKSAMRecord[reads.size()]);
+        final int sampleReadCount = readsBySampleIndex[sampleIndex].length;
+
+        readIndexBySampleIndex[sampleIndex] = new Object2IntOpenHashMap<>(sampleReadCount);
+        for (int r = 0; r < sampleReadCount; r++) {
+            final GATKSAMRecord read = readsBySampleIndex[sampleIndex][r];
+            if (read == null)
+                throw new IllegalArgumentException("read cannot be null");
+            if (readIndexBySampleIndex[sampleIndex].containsKey(read))
+                throw new IllegalArgumentException("repeated read " + read);
+            readIndexBySampleIndex[sampleIndex].put(readsBySampleIndex[sampleIndex][r], r);
+        }
+
+        final double[][] sampleValues = new double[alleleCount][sampleReadCount];
+        valuesBySampleIndex[sampleIndex] = sampleValues;
+    }
+
+    /**
+     * Create an independent copy of this read-likelihoods collection
+     */
+    public ReadLikelihoods<A> clone() {
+
+        final int sampleCount = samples.length;
+        final int alleleCount = alleles.length;
+
+        final double[][][] newLikelihoodValues = new double[sampleCount][alleleCount][];
+
+        @SuppressWarnings("unchecked")
+        final Object2IntMap<GATKSAMRecord>[] newReadIndexBySampleIndex = new Object2IntMap[sampleCount];
+        final GATKSAMRecord[][] newReadsBySampleIndex = new GATKSAMRecord[sampleCount][];
+
+        for (int s = 0; s < sampleCount; s++) {
+            newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]);
+            newReadsBySampleIndex[s] = readsBySampleIndex[s].clone();
+            for (int a = 0; a < alleleCount; a++)
+                newLikelihoodValues[s][a] = valuesBySampleIndex[s][a].clone();
+        }
+
+        // Finally we create the new read-likelihood
+        return new ReadLikelihoods<>(alleles.clone(), samples, sampleIndex,
+                newReadsBySampleIndex,
+                newReadIndexBySampleIndex, newLikelihoodValues);
+    }
+
+    // Internally used constructor.
+    @SuppressWarnings("unchecked")
+    private ReadLikelihoods(final A[] alleles, final String[] samples, final Object2IntMap<String> sampleIndex,
+                            final GATKSAMRecord[][] readsBySampleIndex, final Object2IntMap<GATKSAMRecord>[] readIndex,
+                            final double[][][] values) {
+        this.samples = samples;
+        this.alleles = alleles;
+        this.readsBySampleIndex = readsBySampleIndex;
+        this.valuesBySampleIndex = values;
+        this.readIndexBySampleIndex = readIndex;
+        this.readListBySampleIndex = new List[samples.length];
+
+        this.sampleIndex = sampleIndex;
+
+        this.alleleIndex = new Object2IntOpenHashMap<>(alleles.length);
+        for (int i = 0; i < alleles.length; i++)
+            alleleIndex.put(alleles[i],i);
+
+        if (alleleIndex.size() != alleles.length)
+            throw new IllegalArgumentException("the allele index is null");
+
+        referenceAlleleIndex = findReferenceAllele(alleles);
+        sampleMatrices = (Matrix<A>[]) new Matrix[samples.length];
+    }
+
+
+    // Search for the reference allele, if not found the index is -1.
+    private int findReferenceAllele(final A[] alleles) {
+        for (int i = 0; i < alleles.length; i++)
+            if (alleles[i].isReference())
+                return i;
+        return -1;
+    }
+
+    /**
+     * Checks whether a sample is included likelihood collection.
+     *
+     * @param sample the query sample.
+     *
+     * @throws IllegalArgumentException if {@code sample} is {@code null}.
+     * @return {@code true} iff the sample is in the likelihood collection.
+     */
+    @SuppressWarnings("unused")
+    public boolean containsSample(final String sample) {
+        return sampleIndex.containsKey(sample);
+    }
+
+    /**
+     * Returns the index of a sample within the likelihood collection.
+     *
+     * @param sample the query sample.
+     *
+     * @throws IllegalArgumentException if {@code sample} is {@code null}.
+     * @return -1 if the allele is not included, 0 or greater otherwise.
+     */
+    public int sampleIndex(final String sample) {
+        if (sample == null)
+            throw new IllegalArgumentException("the input allele cannot be null");
+        if (sampleIndex.containsKey(sample))
+            return sampleIndex.getInt(sample);
+        else
+            return -1;
+    }
+
+    /**
+     * Number of samples included in the likelihood collection.
+     * @return 0 or greater.
+     */
+    public int sampleCount() {
+        return samples.length;
+    }
+
+    /**
+     * Returns sample name given its index.
+     *
+     * @param sampleIndex query index.
+     *
+     * @throws IllegalArgumentException if {@code sampleIndex} is negative.
+     *
+     * @return never {@code null}.
+     */
+    public String sample(final int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        return samples[sampleIndex];
+    }
+
+    /**
+     * Checks whether the allele is within the likelihood collection.
+     *
+     * @param allele the query allele.
+     *
+     * @throws IllegalArgumentException if {@code allele} is {@code null}.
+     * @return {@code true} iff the allele is in the likelihood collection.
+     */
+    @SuppressWarnings("unused")
+    public boolean containsAllele(final A allele) {
+        return alleleIndex.containsKey(allele);
+    }
+
+    /**
+     * Returns the index of an allele within the likelihood collection.
+     *
+     * @param allele the query allele.
+     *
+     * @throws IllegalArgumentException if {@code allele} is {@code null}.
+     *
+     * @return -1 if the allele is not included, 0 or greater otherwise.
+     */
+    public int alleleIndex(final A allele) {
+        if (allele == null)
+            throw new IllegalArgumentException("the input allele cannot be null");
+        if (alleleIndex.containsKey(allele))
+            return alleleIndex.getInt(allele);
+        else
+            return -1;
+    }
+
+    /**
+     * Returns number of alleles in the collection.
+     * @return 0 or greater.
+     */
+    @SuppressWarnings("unused")
+    public int alleleCount() {
+        return alleles.length;
+    }
+
+    /**
+     * Returns the allele given its index.
+     *
+     * @param alleleIndex the allele index.
+     *
+     * @throws IllegalArgumentException the allele index is {@code null}.
+     *
+     * @return never {@code null}.
+     */
+    public A allele(final int alleleIndex) {
+        checkAlleleIndex(alleleIndex);
+        return alleles[alleleIndex];
+    }
+
+    /**
+     * Returns the reads that belong to a sample sorted by their index (within that sample).
+     *
+     * @param sampleIndex the requested sample.
+     * @return never {@code null} but perhaps a zero-length array if there is no reads in sample. No element in
+     *   the array will be null.
+     */
+    public List<GATKSAMRecord> sampleReads(final int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        final List<GATKSAMRecord> extantList = readListBySampleIndex[sampleIndex];
+        if (extantList == null)
+            return readListBySampleIndex[sampleIndex] = Collections.unmodifiableList(Arrays.asList(readsBySampleIndex[sampleIndex]));
+        else
+            return extantList;
+    }
+
+    /**
+     * Returns the likelihood matrix for a sample.
+     * <p>
+     *     The matrix is indexed by allele and the by read index.
+     *
+     *     <pre>
+     *          result[a][r] == lnLk(Read_r | Allele_a)
+     *     </pre>
+     * </p>
+     *
+     * <p>
+     *     The matrix is live and changes to it update the likelihood in the collection, please use with care.
+     * </p>
+     *
+     * @param sampleIndex the sample index.
+     *
+     * @return never {@code null}.
+     */
+    /* package */ double[][] sampleValues(final int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        return valuesBySampleIndex[sampleIndex];
+    }
+
+
+    /**
+     * Returns a read vs allele likelihood matrix corresponding to a sample.
+     *
+     * @param sampleIndex target sample.
+     *
+     * @throws IllegalArgumentException if {@code sampleIndex} is not null.
+     *
+     * @return never {@code null}
+     */
+    public Matrix<A> sampleMatrix(final int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        final Matrix<A> extantResult = sampleMatrices[sampleIndex];
+        if (extantResult != null)
+            return extantResult;
+        else
+            return sampleMatrices[sampleIndex] = new SampleMatrix(sampleIndex);
+    }
+
+    /**
+     * Adjusts likelihoods so that for each read, the best allele likelihood is 0 and caps the minimum likelihood
+     * of any allele for each read based on the maximum alternative allele likelihood.
+     *
+     * @param bestToZero set the best likelihood to 0, others will be subtracted the same amount.
+     * @param maximumLikelihoodDifferenceCap maximum difference between the best alternative allele likelihood
+     *                                           and any other likelihood.
+     *
+     * @throws IllegalArgumentException if {@code maximumDifferenceWithBestAlternative} is not 0 or less.
+     */
+    public void normalizeLikelihoods(final boolean bestToZero, final double maximumLikelihoodDifferenceCap) {
+        if (maximumLikelihoodDifferenceCap >= 0.0 || Double.isNaN(maximumLikelihoodDifferenceCap))
+            throw new IllegalArgumentException("the minimum reference likelihood fall cannot be positive");
+
+        if (maximumLikelihoodDifferenceCap == Double.NEGATIVE_INFINITY && !bestToZero)
+            return;
+
+        final int alleleCount = alleles.length;
+        if (alleleCount == 0) // trivial case there is no alleles.
+            return;
+        else if (alleleCount == 1 && !bestToZero)
+            return;
+
+        for (int s = 0; s < valuesBySampleIndex.length; s++) {
+            final double[][] sampleValues = valuesBySampleIndex[s];
+            final int readCount = readsBySampleIndex[s].length;
+            for (int r = 0; r < readCount; r++)
+                normalizeLikelihoodsPerRead(bestToZero, maximumLikelihoodDifferenceCap, sampleValues, s, r);
+        }
+    }
+
+    // Does the normalizeLikelihoods job for each read.
+    private void normalizeLikelihoodsPerRead(final boolean bestToZero, final double maximumBestAltLikelihoodDifference,
+                                             final double[][] sampleValues, final int sampleIndex, final int readIndex) {
+
+        final BestAllele bestAlternativeAllele = searchBestAllele(sampleIndex,readIndex,false);
+
+        final double worstLikelihoodCap = bestAlternativeAllele.likelihood + maximumBestAltLikelihoodDifference;
+
+        final double referenceLikelihood = referenceAlleleIndex == -1 ? Double.NEGATIVE_INFINITY :
+                sampleValues[referenceAlleleIndex][readIndex];
+
+
+        final double bestAbsoluteLikelihood = Math.max(bestAlternativeAllele.likelihood,referenceLikelihood);
+
+        if (bestToZero) {
+            if (bestAbsoluteLikelihood == Double.NEGATIVE_INFINITY)
+                for (int a = 0; a < alleles.length; a++)
+                    sampleValues[a][readIndex] = 0;
+            else if (worstLikelihoodCap != Double.NEGATIVE_INFINITY)
+                for (int a = 0; a < alleles.length; a++)
+                    sampleValues[a][readIndex] = (sampleValues[a][readIndex] < worstLikelihoodCap ? worstLikelihoodCap : sampleValues[a][readIndex]) - bestAbsoluteLikelihood;
+            else
+                for (int a = 0; a < alleles.length; a++)
+                    sampleValues[a][readIndex] -= bestAbsoluteLikelihood;
+        } else  // else if (maximumReferenceLikelihoodFall != Double.NEGATIVE_INFINITY ) { //
+                 // Guarantee to be the case by enclosing code.
+            for (int a = 0; a < alleles.length; a++)
+                if (sampleValues[a][readIndex] < worstLikelihoodCap)
+                    sampleValues[a][readIndex] = worstLikelihoodCap;
+    }
+
+    /**
+     * Returns the samples in this read-likelihood collection.
+     * <p>
+     *     Samples are sorted by their index in the collection.
+     * </p>
+     *
+     * <p>
+     *     The returned list is an unmodifiable view on the read-likelihoods sample list.
+     * </p>
+     *
+     * @return never {@code null}.
+     */
+    public List<String> samples() {
+        if (sampleList == null)
+            sampleList = Collections.unmodifiableList(Arrays.asList(samples));
+        return sampleList;
+    }
+
+    /**
+     * Returns the samples in this read-likelihood collection.
+     * <p>
+     *     Samples are sorted by their index in the collection.
+     * </p>
+     *
+     * <p>
+     *     The returned list is an unmodifiable. It will not be updated if the collection
+     *     allele list changes.
+     * </p>
+     *
+     * @return never {@code null}.
+     */
+    public List<A> alleles() {
+        if (alleleList == null)
+            alleleList = Collections.unmodifiableList(Arrays.asList(alleles));
+        return alleleList;
+    }
+
+    /**
+     * Search the best allele for a read.
+     *
+     * @param sampleIndex including sample index.
+     * @param readIndex  target read index.
+     *
+     * @return never {@code null}, but with {@link BestAllele#allele allele} == {@code null}
+     * if non-could be found.
+     */
+    private BestAllele searchBestAllele(final int sampleIndex, final int readIndex, final boolean canBeReference) {
+        final int alleleCount = alleles.length;
+        if (alleleCount == 0 || (alleleCount == 1 && referenceAlleleIndex == 0 && !canBeReference))
+            return new BestAllele(sampleIndex,readIndex,-1,Double.NEGATIVE_INFINITY,Double.NEGATIVE_INFINITY);
+
+        final double[][] sampleValues = valuesBySampleIndex[sampleIndex];
+        int bestAlleleIndex = canBeReference || referenceAlleleIndex != 0 ? 0 : 1;
+
+        double bestLikelihood = sampleValues[bestAlleleIndex][readIndex];
+        double secondBestLikelihood = Double.NEGATIVE_INFINITY;
+        for (int a = bestAlleleIndex + 1; a < alleleCount; a++) {
+            if (!canBeReference && referenceAlleleIndex == a)
+                continue;
+            final double candidateLikelihood = sampleValues[a][readIndex];
+            if (candidateLikelihood > bestLikelihood) {
+                bestAlleleIndex = a;
+                secondBestLikelihood = bestLikelihood;
+                bestLikelihood = candidateLikelihood;
+            } else if (candidateLikelihood > secondBestLikelihood) {
+                secondBestLikelihood = candidateLikelihood;
+            }
+        }
+        return new BestAllele(sampleIndex,readIndex,bestAlleleIndex,bestLikelihood,secondBestLikelihood);
+    }
+
+    public void changeReads(final Map<GATKSAMRecord, GATKSAMRecord> readRealignments) {
+        final int sampleCount = samples.length;
+        for (int s = 0; s < sampleCount; s++) {
+            final GATKSAMRecord[] sampleReads = readsBySampleIndex[s];
+            final Object2IntMap<GATKSAMRecord> readIndex = readIndexBySampleIndex[s];
+            final int sampleReadCount = sampleReads.length;
+            for (int r = 0; r < sampleReadCount; r++) {
+                final GATKSAMRecord read = sampleReads[r];
+                final GATKSAMRecord replacement = readRealignments.get(read);
+                if (replacement == null)
+                    continue;
+                readIndex.put(sampleReads[r] = replacement,r);
+                readIndex.remove(read);
+            }
+        }
+    }
+
+    /**
+     * Likelihood matrix between a set of alleles and reads.
+     * @param <A> the allele-type.
+     */
+    public interface Matrix<A extends Allele> {
+
+        /**
+         * List of reads in the matrix sorted by their index therein.
+         * @return never {@code null}.
+         */
+        public List<GATKSAMRecord> reads();
+
+        /**
+         * List of alleles in the matrix sorted by their index in the collection.
+         * @return never {@code null}.
+         */
+        public List<A> alleles();
+
+        /**
+         * Set the likelihood of a read given an allele through their indices.
+         *
+         * @param alleleIndex the target allele index.
+         * @param readIndex the target read index.
+         * @param value new likelihood value for the target read give the target allele.
+         *
+         * @throws IllegalArgumentException if {@code alleleIndex} or {@code readIndex}
+         *  are not valid allele and read indices respectively.
+         */
+        public void set(final int alleleIndex, final int readIndex, final double value);
+
+        /**
+         * Returns the likelihood of a read given a haplotype.
+         *
+         * @param alleleIndex the index of the given haplotype.
+         * @param readIndex the index of the target read.
+         *
+         * @throws IllegalArgumentException if {@code alleleIndex} or {@code readIndex} is not a
+         * valid allele or read index respectively.
+         *
+         * @return the requested likelihood, whatever value was provided using {@link #set(int,int,double) set}
+         *    or 0.0 if none was set.
+         */
+        public double get(final int alleleIndex, final int readIndex);
+
+        /**
+         * Queries the index of an allele in the matrix.
+         *
+         * @param allele the target allele.
+         *
+         * @throws IllegalArgumentException if {@code allele} is {@code null}.
+         * @return -1 if such allele does not exist, otherwise its index which 0 or greater.
+         */
+        @SuppressWarnings("unused")
+        public int alleleIndex(final A allele);
+
+        /**
+         * Queries the index of a read in the matrix.
+         *
+         * @param read the target read.
+         *
+         * @throws IllegalArgumentException if {@code read} is {@code null}.
+         *
+         * @return -1 if there is not such a read in the matrix, otherwise its index
+         *    which is 0 or greater.
+         */
+        @SuppressWarnings("unused")
+        public int readIndex(final GATKSAMRecord read);
+
+        /**
+         * Number of allele in the matrix.
+         * @return never negative.
+         */
+        public int alleleCount();
+
+        /**
+         * Number of reads in the matrix.
+         * @return never negative.
+         */
+        public int readCount();
+
+        /**
+         * Returns the allele given its index.
+         *
+         * @param alleleIndex the target allele index.
+         *
+         * @throws IllegalArgumentException if {@code alleleIndex} is not a valid allele index.
+         * @return never {@code null}.
+         */
+        public A allele(final int alleleIndex);
+
+        /**
+         * Returns the allele given its index.
+         *
+         * @param readIndex the target allele index.
+         *
+         * @throws IllegalArgumentException if {@code readIndex} is not a valid read index.
+         * @return never {@code null}.
+         */
+        public GATKSAMRecord read(final int readIndex);
+    }
+
+    /**
+     * Perform marginalization from an allele set to another (smaller one) taking the maximum value
+     * for each read in the original allele subset.
+     *
+     * @param newToOldAlleleMap map where the keys are the new alleles and the value list the original
+     *                          alleles that correspond to the new one.
+     * @return never {@code null}. The result will have the requested set of new alleles (keys in {@code newToOldAlleleMap}, and
+     * the same set of samples and reads as the original.
+     *
+     * @throws IllegalArgumentException is {@code newToOldAlleleMap} is {@code null} or contains {@code null} values,
+     *  or its values contain reference to non-existing alleles in this read-likelihood collection. Also no new allele
+     *  can have zero old alleles mapping nor two new alleles can make reference to the same old allele.
+     */
+    public <B extends Allele> ReadLikelihoods<B> marginalize(final Map<B, List<A>> newToOldAlleleMap) {
+
+        if (newToOldAlleleMap == null)
+            throw new IllegalArgumentException("the input allele mapping cannot be null");
+
+        @SuppressWarnings("unchecked")
+        final B[] newAlleles = newToOldAlleleMap.keySet().toArray((B[]) new Allele[newToOldAlleleMap.size()]);
+        final int oldAlleleCount = alleles.length;
+        final int newAlleleCount = newAlleles.length;
+
+        // we get the index correspondence between new old -> new allele, -1 entries mean that the old
+        // allele does not map to any new; supported but typically not the case.
+        final int[] oldToNewAlleleIndexMap = oldToNewAlleleIndexMap(newToOldAlleleMap, newAlleles, oldAlleleCount, newAlleleCount);
+
+        // We calculate the marginal likelihoods.
+
+        final double[][][] newLikelihoodValues = marginalLikelihoods(oldAlleleCount, newAlleleCount, oldToNewAlleleIndexMap, null);
+
+        final int sampleCount = samples.length;
+
+        @SuppressWarnings("unchecked")
+        final Object2IntMap<GATKSAMRecord>[] newReadIndexBySampleIndex = new Object2IntMap[sampleCount];
+        final GATKSAMRecord[][] newReadsBySampleIndex = new GATKSAMRecord[sampleCount][];
+
+        for (int s = 0; s < sampleCount; s++) {
+            newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]);
+            newReadsBySampleIndex[s] = readsBySampleIndex[s].clone();
+        }
+
+        // Finally we create the new read-likelihood
+        return new ReadLikelihoods<>(newAlleles, samples, sampleIndex,
+                newReadsBySampleIndex,
+                newReadIndexBySampleIndex, newLikelihoodValues);
+    }
+
+
+    /**
+     * Perform marginalization from an allele set to another (smaller one) taking the maximum value
+     * for each read in the original allele subset.
+     *
+     * @param newToOldAlleleMap map where the keys are the new alleles and the value list the original
+     *                          alleles that correspond to the new one.
+     * @return never {@code null}. The result will have the requested set of new alleles (keys in {@code newToOldAlleleMap}, and
+     * the same set of samples and reads as the original.
+     *
+     * @param overlap if not {@code null}, only reads that overlap the location (with unclipping) will be present in
+     *                        the output read-collection.
+     *
+     * @throws IllegalArgumentException is {@code newToOldAlleleMap} is {@code null} or contains {@code null} values,
+     *  or its values contain reference to non-existing alleles in this read-likelihood collection. Also no new allele
+     *  can have zero old alleles mapping nor two new alleles can make reference to the same old allele.
+     */
+    public <B extends Allele> ReadLikelihoods<B> marginalize(final Map<B, List<A>> newToOldAlleleMap,final GenomeLoc overlap) {
+
+        if (overlap == null)
+            return marginalize(newToOldAlleleMap);
+
+        if (newToOldAlleleMap == null)
+            throw new IllegalArgumentException("the input allele mapping cannot be null");
+
+        @SuppressWarnings("unchecked")
+        final B[] newAlleles = newToOldAlleleMap.keySet().toArray((B[]) new Allele[newToOldAlleleMap.size()]);
+        final int oldAlleleCount = alleles.length;
+        final int newAlleleCount = newAlleles.length;
+
+        // we get the index correspondence between new old -> new allele, -1 entries mean that the old
+        // allele does not map to any new; supported but typically not the case.
+        final int[] oldToNewAlleleIndexMap = oldToNewAlleleIndexMap(newToOldAlleleMap, newAlleles, oldAlleleCount, newAlleleCount);
+
+        final int[][] readsToKeep = overlappingReadIndicesBySampleIndex(overlap);
+        // We calculate the marginal likelihoods.
+
+        final double[][][] newLikelihoodValues = marginalLikelihoods(oldAlleleCount, newAlleleCount, oldToNewAlleleIndexMap, readsToKeep);
+
+        final int sampleCount = samples.length;
+
+        @SuppressWarnings("unchecked")
+        final Object2IntMap<GATKSAMRecord>[] newReadIndexBySampleIndex = new Object2IntMap[sampleCount];
+        final GATKSAMRecord[][] newReadsBySampleIndex = new GATKSAMRecord[sampleCount][];
+
+        for (int s = 0; s < sampleCount; s++) {
+            final int[] sampleReadsToKeep = readsToKeep[s];
+            final GATKSAMRecord[] oldSampleReads = readsBySampleIndex[s];
+            final int oldSampleReadCount = oldSampleReads.length;
+            final int newSampleReadCount = sampleReadsToKeep.length;
+            if (newSampleReadCount == oldSampleReadCount) {
+                newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(readIndexBySampleIndex[s]);
+                newReadsBySampleIndex[s] = readsBySampleIndex[s].clone();
+            } else {
+                final Object2IntMap<GATKSAMRecord> newSampleReadIndex = newReadIndexBySampleIndex[s] = new Object2IntOpenHashMap<>(newSampleReadCount);
+                final GATKSAMRecord[] newSampleReads = newReadsBySampleIndex[s] = new GATKSAMRecord[newSampleReadCount];
+                for (int r = 0; r < newSampleReadCount; r++)
+                    newSampleReadIndex.put(newSampleReads[r] = oldSampleReads[sampleReadsToKeep[r]],r);
+            }
+        }
+
+        // Finally we create the new read-likelihood
+        return new ReadLikelihoods<>(newAlleles, samples, sampleIndex,
+                newReadsBySampleIndex,
+                newReadIndexBySampleIndex, newLikelihoodValues);
+    }
+
+    private int[][] overlappingReadIndicesBySampleIndex(final GenomeLoc overlap) {
+        if (overlap == null)
+            return null;
+        final int sampleCount = samples.length;
+        final int[][] result = new int[sampleCount][];
+        final IntArrayList buffer = new IntArrayList(200);
+        final int referenceIndex = overlap.getContigIndex();
+        final int overlapStart = overlap.getStart();
+        final int overlapEnd = overlap.getStop();
+        for (int s = 0; s < sampleCount; s++) {
+            buffer.clear();
+            final GATKSAMRecord[] sampleReads = readsBySampleIndex[s];
+            final int sampleReadCount = sampleReads.length;
+            buffer.ensureCapacity(sampleReadCount);
+            for (int r = 0; r < sampleReadCount; r++)
+                if (unclippedReadOverlapsRegion(sampleReads[r], referenceIndex, overlapStart, overlapEnd))
+                    buffer.add(r);
+            result[s] = buffer.toIntArray();
+        }
+        return result;
+    }
+
+    public static boolean unclippedReadOverlapsRegion(final GATKSAMRecord read, final GenomeLoc region) {
+        return unclippedReadOverlapsRegion(read, region.getContigIndex(), region.getStart(), region.getStop());
+    }
+
+    private static boolean unclippedReadOverlapsRegion(final GATKSAMRecord sampleRead, final int referenceIndex, final int start, final int end) {
+        final int readReference = sampleRead.getReferenceIndex();
+       if (readReference != referenceIndex)
+            return false;
+
+        final int readStart = sampleRead.getUnclippedStart();
+        if (readStart > end)
+            return false;
+
+        final int readEnd = sampleRead.getReadUnmappedFlag() ? sampleRead.getUnclippedEnd()
+                : Math.max(sampleRead.getUnclippedEnd(), sampleRead.getUnclippedStart());
+        return readEnd >= start;
+    }
+
+
+
+    // Calculate the marginal likelihoods considering the old -> new allele index mapping.
+    private double[][][] marginalLikelihoods(final int oldAlleleCount, final int newAlleleCount, final int[] oldToNewAlleleIndexMap, final int[][] readsToKeep) {
+
+        final int sampleCount = samples.length;
+        final double[][][] result = new double[sampleCount][][];
+
+        for (int s = 0; s < sampleCount; s++) {
+            final int sampleReadCount = readsBySampleIndex[s].length;
+            final double[][] oldSampleValues = valuesBySampleIndex[s];
+            final int[] sampleReadToKeep = readsToKeep == null || readsToKeep[s].length == sampleReadCount ? null : readsToKeep[s];
+            final int newSampleReadCount = sampleReadToKeep == null ? sampleReadCount : sampleReadToKeep.length;
+            final double[][] newSampleValues = result[s] = new double[newAlleleCount][newSampleReadCount];
+            // We initiate all likelihoods to -Inf.
+            for (int a = 0; a < newAlleleCount; a++)
+                Arrays.fill(newSampleValues[a], Double.NEGATIVE_INFINITY);
+            // For each old allele and read we update the new table keeping the maximum likelihood.
+            for (int r = 0; r < newSampleReadCount; r++) {
+                for (int a = 0; a < oldAlleleCount; a++) {
+                    final int oldReadIndex = newSampleReadCount == sampleReadCount ? r : sampleReadToKeep[r];
+                    final int newAlleleIndex = oldToNewAlleleIndexMap[a];
+                    if (newAlleleIndex == -1)
+                        continue;
+                    final double likelihood = oldSampleValues[a][oldReadIndex];
+                    if (likelihood > newSampleValues[newAlleleIndex][r])
+                        newSampleValues[newAlleleIndex][r] = likelihood;
+                }
+            }
+        }
+        return result;
+    }
+
+    // calculates an old to new allele index map array.
+    private <B extends Allele> int[] oldToNewAlleleIndexMap(final Map<B, List<A>> newToOldAlleleMap, final B[] newAlleles,
+                                                            final int oldAlleleCount, final int newAlleleCount) {
+
+        final int[] oldToNewAlleleIndexMap = new int[oldAlleleCount];
+        Arrays.fill(oldToNewAlleleIndexMap, -1);  // -1 indicate that there is no new allele that make reference to that old one.
+
+        for (int i = 0; i < newAlleleCount; i++) {
+            final B newAllele = newAlleles[i];
+            if (newAllele == null)
+                throw new IllegalArgumentException("input alleles cannot be null");
+            final List<A> oldAlleles = newToOldAlleleMap.get(newAllele);
+            if (oldAlleles == null)
+                throw new IllegalArgumentException("no new allele list can be null");
+            for (final A oldAllele : oldAlleles) {
+                if (oldAllele == null)
+                    throw new IllegalArgumentException("old alleles cannot be null");
+                final int oldAlleleIndex = alleleIndex(oldAllele);
+                if (oldAlleleIndex == -1)
+                    throw new IllegalArgumentException("missing old allele " + oldAllele + " in likelihood collection ");
+                if (oldToNewAlleleIndexMap[oldAlleleIndex] != -1)
+                    throw new IllegalArgumentException("collision: two new alleles make reference to the same old allele");
+                oldToNewAlleleIndexMap[oldAlleleIndex] = i;
+            }
+        }
+        return oldToNewAlleleIndexMap;
+    }
+
+    /**
+     * Remove those reads that do not overlap certain genomic location.
+     *
+     * <p>
+     *     This method modifies the current read-likelihoods collection.
+     * </p>
+     *
+     * @param location the target location.
+     *
+     * @throws IllegalArgumentException the location cannot be {@code null} nor unmapped.
+     */
+    @SuppressWarnings("unused")
+    public void filterToOnlyOverlappingUnclippedReads(final GenomeLoc location) {
+        if (location == null)
+            throw new IllegalArgumentException("the location cannot be null");
+        if (location.isUnmapped())
+            throw new IllegalArgumentException("the location cannot be unmapped");
+
+        final int sampleCount = samples.length;
+
+        final int locContig = location.getContigIndex();
+        final int locStart = location.getStart();
+        final int locEnd = location.getStop();
+
+        final Set<GATKSAMRecord> readsToRemove = new HashSet<>(100); // can we improve this estimate?
+        for (int s = 0; s < sampleCount; s++)
+            for (final GATKSAMRecord read : readsBySampleIndex[s])
+                if (!unclippedReadOverlapsRegion(read, locContig, locStart, locEnd))
+                    readsToRemove.add(read);
+        removeReads(readsToRemove);
+    }
+
+    // Compare the read coordinates to the location of interest.
+    private boolean readOverlapsLocation(final String contig, final int locStart,
+                                         final int locEnd, final GATKSAMRecord read) {
+        final boolean overlaps;
+
+        if (read.getReadUnmappedFlag())
+            overlaps = false;
+        else if (!read.getReferenceName().equals(contig))
+            overlaps = false;
+        else {
+            int alnStart = read.getAlignmentStart();
+            int alnStop = read.getAlignmentEnd();
+            if (alnStart > alnStop) { // Paranoia? based on GLP.createGenomeLoc(Read) this can happen?.
+                final int end = alnStart;
+                alnStart = alnStop;
+                alnStop = end;
+            }
+            overlaps = !(alnStop < locStart || alnStart > locEnd);
+        }
+        return overlaps;
+    }
+
+    /**
+     * Removes those read that the best possible likelihood given any allele is just too low.
+     *
+     * <p>
+     *     This is determined by a maximum error per read-base against the best likelihood possible.
+     * </p>
+     *
+     * @param maximumErrorPerBase the minimum acceptable error rate per read base, must be
+     *                            a positive number.
+     *
+     * @throws IllegalStateException is not supported for read-likelihood that do not contain alleles.
+     *
+     * @throws IllegalArgumentException if {@code maximumErrorPerBase} is negative.
+     */
+    public void filterPoorlyModeledReads(final double maximumErrorPerBase) {
+        if (alleles.length == 0)
+            throw new IllegalStateException("unsupported for read-likelihood collections with no alleles");
+        if (Double.isNaN(maximumErrorPerBase) || maximumErrorPerBase <= 0.0)
+            throw new IllegalArgumentException("the maximum error per base must be a positive number");
+        final int sampleCount = samples.length;
+
+        final Set<GATKSAMRecord> readsToRemove = new HashSet<>(100); // rough estimate, can be improved?
+        for (int s = 0; s < sampleCount; s++) {
+            final GATKSAMRecord[] sampleReads = readsBySampleIndex[s];
+            final int sampleReadCount = sampleReads.length;
+            for (int r = 0; r < sampleReadCount; r++) {
+                final GATKSAMRecord read = sampleReads[r];
+                if (readIsPoorlyModelled(s,r,read, maximumErrorPerBase))
+                    readsToRemove.add(read);
+            }
+        }
+        removeReads(readsToRemove);
+    }
+
+    // Check whether the read is poorly modelled.
+    protected boolean readIsPoorlyModelled(final int sampleIndex, final int readIndex, final GATKSAMRecord read, final double maxErrorRatePerBase) {
+        final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase));
+        final double log10QualPerBase = -4.0;
+        final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase;
+
+        final int alleleCount = alleles.length;
+        final double[][] sampleValues = valuesBySampleIndex[sampleIndex];
+        for (int a = 0; a < alleleCount; a++)
+            if (sampleValues[a][readIndex] >= log10MaxLikelihoodForTrueAllele)
+                return false;
+        return true;
+    }
+
+
+    /**
+     * Add more reads to the collection.
+     *
+     * @param readsBySample reads to add.
+     * @param initialLikelihood the likelihood for the new entries.
+     *
+     * @throws IllegalArgumentException if {@code readsBySample} is {@code null} or {@code readsBySample} contains
+     *  {@code null} reads, or {@code readsBySample} contains read that are already present in the read-likelihood
+     *  collection.
+     */
+    public void addReads(final Map<String,List<GATKSAMRecord>> readsBySample, final double initialLikelihood) {
+
+        for (final Map.Entry<String,List<GATKSAMRecord>> entry : readsBySample.entrySet()) {
+
+            final String sample = entry.getKey();
+            final List<GATKSAMRecord> newSampleReads = entry.getValue();
+            final int sampleIndex = this.sampleIndex.getInt(sample);
+
+            if (sampleIndex == -1)
+                throw new IllegalArgumentException("input sample " + sample +
+                        " is not part of the read-likelihoods collection");
+
+            if (newSampleReads == null || newSampleReads.size() == 0)
+                continue;
+
+            final int sampleReadCount = readsBySampleIndex[sampleIndex].length;
+            final int newSampleReadCount = sampleReadCount + newSampleReads.size();
+
+            appendReads(newSampleReads, sampleIndex, sampleReadCount, newSampleReadCount);
+            extendsLikelihoodArrays(initialLikelihood, sampleIndex, sampleReadCount, newSampleReadCount);
+        }
+    }
+
+    // Extends the likelihood arrays-matrices.
+    private void extendsLikelihoodArrays(double initialLikelihood, int sampleIndex, int sampleReadCount, int newSampleReadCount) {
+        final double[][] sampleValues = valuesBySampleIndex[sampleIndex];
+        final int alleleCount = alleles.length;
+        for (int a = 0; a < alleleCount; a++)
+            sampleValues[a] = Arrays.copyOf(sampleValues[a], newSampleReadCount);
+        if (initialLikelihood != 0.0) // the default array new value.
+            for (int a = 0; a < alleleCount; a++)
+                Arrays.fill(sampleValues[a],sampleReadCount,newSampleReadCount,initialLikelihood);
+    }
+
+    // Append the new read reference into the structure per-sample.
+    private void appendReads(final List<GATKSAMRecord> newSampleReads, final int sampleIndex,
+                             final int sampleReadCount, final int newSampleReadCount) {
+        final GATKSAMRecord[] sampleReads = readsBySampleIndex[sampleIndex] =
+                Arrays.copyOf(readsBySampleIndex[sampleIndex], newSampleReadCount);
+
+        int nextReadIndex = sampleReadCount;
+        final Object2IntMap<GATKSAMRecord> sampleReadIndex = readIndexBySampleIndex[sampleIndex];
+        for (final GATKSAMRecord newRead : newSampleReads) {
+            if (sampleReadIndex.containsKey(newRead)) // might be worth handle this without exception (ignore the read?) but in practice should never be the case.
+                throw new IllegalArgumentException("you cannot add reads that are already in read-likelihood collection");
+            sampleReads[nextReadIndex] = newRead;
+            sampleReadIndex.put(newRead,nextReadIndex++);
+        }
+    }
+
+    /**
+     * Adds the non-reference allele to the read-likelihood collection setting each read likelihood to the second
+     * best found (or best one if only one allele has likelihood).
+     *
+     * <p>Nothing will happen if the read-likelihoods collection already includes the non-ref allele</p>
+     *
+     * <p>
+     *     <i>Implementation note: even when strictly speaking we do not need to demand the calling code to pass
+     *     the reference the non-ref allele, we still demand it in order to lead the
+     *     the calling code to use the right generic type for this likelihoods
+     *     collection {@link Allele}.</i>
+     * </p>
+     *
+     * @param nonRefAllele the non-ref allele.
+     *
+     * @throws IllegalArgumentException if {@code nonRefAllele} is anything but the designated &lt;NON_REF&gt;
+     * symbolic allele {@link GATKVariantContextUtils#NON_REF_SYMBOLIC_ALLELE}.
+     */
+    public void addNonReferenceAllele(final A nonRefAllele) {
+
+        if (nonRefAllele == null)
+            throw new IllegalArgumentException("non-ref allele cannot be null");
+        if (!nonRefAllele.equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE))
+            throw new IllegalArgumentException("the non-ref allele is not valid");
+        // Already present?
+        if (alleleIndex.containsKey(nonRefAllele))
+            return;
+
+        final int alleleCount = alleles.length;
+        final int newAlleleCount = alleleCount + 1;
+        alleles = Arrays.copyOf(alleles,newAlleleCount);
+        alleles[alleleCount] = nonRefAllele;
+        alleleIndex.put(nonRefAllele,alleleCount);
+        alleleList = null; // remove the cached alleleList.
+
+        final int sampleCount = samples.length;
+        for (int s = 0; s < sampleCount; s++)
+            addNonReferenceAlleleLikelihoodsPerSample(alleleCount, newAlleleCount, s);
+    }
+
+    // Updates per-sample structures according to the addition of the NON_REF allele.
+    private void addNonReferenceAlleleLikelihoodsPerSample(final int alleleCount, final int newAlleleCount, final int sampleIndex) {
+        final double[][] sampleValues = valuesBySampleIndex[sampleIndex] = Arrays.copyOf(valuesBySampleIndex[sampleIndex], newAlleleCount);
+        final int sampleReadCount = readsBySampleIndex[sampleIndex].length;
+
+        final double[] nonRefAlleleLikelihoods = sampleValues[alleleCount] = new double [sampleReadCount];
+        Arrays.fill(nonRefAlleleLikelihoods,Double.NEGATIVE_INFINITY);
+        for (int r = 0; r < sampleReadCount; r++) {
+            final BestAllele bestAllele = searchBestAllele(sampleIndex,r,true);
+            final double secondBestLikelihood = Double.isInfinite(bestAllele.confidence) ? bestAllele.likelihood
+                    : bestAllele.likelihood - bestAllele.confidence;
+            nonRefAlleleLikelihoods[r] = secondBestLikelihood;
+        }
+    }
+
+    /**
+     * Downsamples reads based on contamination fractions making sure that all alleles are affected proportionally.
+     *
+     * @param perSampleDownsamplingFraction contamination sample map where the sample name are the keys and the
+     *                                       fractions are the values.
+     *
+     * @throws IllegalArgumentException if {@code perSampleDownsamplingFraction} is {@code null}.
+     */
+    public void contaminationDownsampling(final Map<String, Double> perSampleDownsamplingFraction) {
+
+        final int sampleCount = samples.length;
+        final Set<GATKSAMRecord> readsToRemove = new HashSet<>(100); // blind estimate, can be improved?
+        for (int s = 0; s < sampleCount; s++) {
+            final String sample = samples[s];
+            final Double fractionDouble = perSampleDownsamplingFraction.get(sample);
+            if (fractionDouble == null)
+                continue;
+            final double fraction = fractionDouble;
+            if (Double.isNaN(fraction) || fraction <= 0.0)
+                continue;
+            if (fraction >= 1.0)
+                readsToRemove.addAll(Arrays.asList(readsBySampleIndex[s]));
+            else {
+                final Map<A,List<GATKSAMRecord>> readsByBestAllelesMap = readsByBestAlleleMap(s);
+                readsToRemove.addAll(AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(readsByBestAllelesMap, fraction));
+            }
+        }
+        removeReads(readsToRemove);
+    }
+
+    /**
+     * Returns the collection of best allele estimates for the reads based on the read-likelihoods.
+     *
+     * @throws IllegalStateException if there is no alleles.
+     *
+     * @return never {@code null}, one element per read in the read-likelihoods collection.
+     */
+    public Collection<BestAllele> bestAlleles() {
+        final List<BestAllele> result = new ArrayList<>(100); // blind estimate.
+        final int sampleCount = samples.length;
+        for (int s = 0; s < sampleCount; s++) {
+            final GATKSAMRecord[] sampleReads = readsBySampleIndex[s];
+            final int readCount = sampleReads.length;
+            for (int r = 0; r < readCount; r++)
+                result.add(searchBestAllele(s,r,true));
+        }
+        return result;
+    }
+
+    /**
+     * Returns reads stratified by their best allele.
+     * @param sampleIndex the target sample.
+     * @return never {@code null}, perhaps empty.
+     */
+    public Map<A,List<GATKSAMRecord>> readsByBestAlleleMap(final int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        final int alleleCount = alleles.length;
+        final int sampleReadCount = readsBySampleIndex[sampleIndex].length;
+        final Map<A,List<GATKSAMRecord>> result = new HashMap<>(alleleCount);
+        for (final A allele : alleles)
+            result.put(allele,new ArrayList<GATKSAMRecord>(sampleReadCount));
+        readsByBestAlleleMap(sampleIndex,result);
+        return result;
+    }
+
+    /**
+     * Returns reads stratified by their best allele.
+     * @return never {@code null}, perhaps empty.
+     */
+    @SuppressWarnings("unused")
+    public Map<A,List<GATKSAMRecord>> readsByBestAlleleMap() {
+        final int alleleCount = alleles.length;
+        final Map<A,List<GATKSAMRecord>> result = new HashMap<>(alleleCount);
+        final int totalReadCount = readCount();
+        for (final A allele : alleles)
+            result.put(allele,new ArrayList<GATKSAMRecord>(totalReadCount));
+        final int sampleCount = samples.length;
+        for (int s = 0; s < sampleCount; s++)
+            readsByBestAlleleMap(s,result);
+        return result;
+    }
+
+    private void readsByBestAlleleMap(final int sampleIndex, final Map<A,List<GATKSAMRecord>> result) {
+        final GATKSAMRecord[] reads = readsBySampleIndex[sampleIndex];
+        final int readCount = reads.length;
+
+        for (int r = 0; r < readCount; r++) {
+            final BestAllele bestAllele = searchBestAllele(sampleIndex,r,true);
+            if (!bestAllele.isInformative())
+                continue;
+            result.get(bestAllele.allele).add(bestAllele.read);
+        }
+    }
+
+    /**
+     * Returns the index of a read within a sample read-likelihood sub collection.
+     * @param sampleIndex the sample index.
+     * @param read the query read.
+     * @return -1 if there is no such read in that sample, 0 or greater otherwise.
+     */
+    @SuppressWarnings("unused")
+    public int readIndex(final int sampleIndex, final GATKSAMRecord read) {
+        final Object2IntMap<GATKSAMRecord> readIndex = readIndexBySampleIndex[sampleIndex];
+        if (readIndex.containsKey(read))
+            return readIndexBySampleIndex[sampleIndex].getInt(read);
+        else
+            return -1;
+    }
+
+    /**
+     * Returns the total number of reads in the read-likelihood collection.
+     *
+     * @return never {@code null}
+     */
+    public int readCount() {
+        int sum = 0;
+        for (int i = 0; i < samples.length; i++)
+            sum += readsBySampleIndex[i].length;
+        return sum;
+    }
+
+    /**
+     * Returns the number of reads that belong to a sample in the read-likelihood collection.
+     * @param sampleIndex the query sample index.
+     *
+     * @throws IllegalArgumentException if {@code sampleIndex} is not a valid sample index.
+     * @return 0 or greater.
+     */
+    public int sampleReadCount(int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        return readsBySampleIndex[sampleIndex].length;
+    }
+
+    /**
+     * Contains information about the best allele for a read search result.
+     */
+    public class BestAllele {
+        public static final double INFORMATIVE_THRESHOLD = 0.2;
+
+        /**
+         * Null if there is no possible match (no allele?).
+         */
+        public final A allele;
+
+        /**
+         * The containing sample.
+         */
+        public final String sample;
+
+        /**
+         * The query read.
+         */
+        public final GATKSAMRecord read;
+
+        /**
+         * If allele != null, the indicates the likelihood of the read.
+         */
+        public final double likelihood;
+
+        /**
+         * Confidence that the read actually was generated under that likelihood.
+         * This is equal to the difference between this and the second best allele match.
+         */
+        public final double confidence;
+
+        private BestAllele(final int sampleIndex, final int readIndex, final int bestAlleleIndex,
+                           final double likelihood, final double secondBestLikelihood) {
+            allele = bestAlleleIndex == -1 ? null : alleles[bestAlleleIndex];
+            this.likelihood = likelihood;
+            sample = samples[sampleIndex];
+            read = readsBySampleIndex[sampleIndex][readIndex];
+            confidence = likelihood == secondBestLikelihood ? 0 : likelihood - secondBestLikelihood;
+        }
+
+        public boolean isInformative() {
+            return confidence > INFORMATIVE_THRESHOLD;
+        }
+    }
+
+
+    /**
+     * Remove a set of reads from the read-likelihoods
+     * @param readsToRemove collection of reads to remove.
+     */
+    @SuppressWarnings("unused")
+    public void removeReads(final Collection<GATKSAMRecord> readsToRemove) {
+        if (readsToRemove == null)
+            throw new IllegalArgumentException("remove read collection cannot be null");
+        if (readsToRemove.isEmpty())
+            return;
+        removeReads(new HashSet<>(readsToRemove));
+    }
+
+    // Requires that the collection passed iterator can remove elements, and it can be modified.
+    private void removeReads(final Set<GATKSAMRecord> readsToRemove) {
+        if (readsToRemove.isEmpty())
+            return;
+        final int sampleCount = samples.length;
+        final int alleleCount = alleles.length;
+        for (int s = 0; s < sampleCount; s++) {
+            removeSampleReads(s, readsToRemove, alleleCount);
+            if (readsToRemove.isEmpty()) break; // each iteration may exhaust the reads to remove.
+        }
+    }
+
+    // Requires that the collection passed iterator can remove elements, and it can be modified.
+    private void removeSampleReads(final int sampleIndex, final Set<GATKSAMRecord> readsToRemove, final int alleleCount) {
+        final GATKSAMRecord[] sampleReads = readsBySampleIndex[sampleIndex];
+        final int sampleReadCount = sampleReads.length;
+
+        final Object2IntMap<GATKSAMRecord> indexByRead = readIndexBySampleIndex[sampleIndex];
+        // Count how many we are going to remove, which ones (indexes) and remove entry from the read-index map.
+        final boolean[] removeIndex = new boolean[sampleReadCount];
+        int removeCount = 0; // captures the number of deletions.
+        int firstDeleted = sampleReadCount;    // captures the first position that was deleted.
+
+        final Iterator<GATKSAMRecord> readsToRemoveIterator = readsToRemove.iterator();
+        while (readsToRemoveIterator.hasNext()) {
+            final GATKSAMRecord read = readsToRemoveIterator.next();
+            if (indexByRead.containsKey(read)) {
+                final int index = indexByRead.getInt(read);
+                if (firstDeleted > index)
+                    firstDeleted = index;
+                removeCount++;
+                removeIndex[index] = true;
+                readsToRemoveIterator.remove();
+                indexByRead.remove(read);
+            }
+        }
+
+        // Nothing to remove we just finish here.
+        if (removeCount == 0)
+            return;
+
+        final int newSampleReadCount = sampleReadCount - removeCount;
+
+        // Now we skim out the removed reads from the read array.
+        final GATKSAMRecord[] oldSampleReads = readsBySampleIndex[sampleIndex];
+        final GATKSAMRecord[] newSampleReads = new GATKSAMRecord[newSampleReadCount];
+
+        System.arraycopy(oldSampleReads,0,newSampleReads,0,firstDeleted);
+        Utils.skimArray(oldSampleReads,firstDeleted, newSampleReads, firstDeleted, removeIndex, firstDeleted);
+
+        // Update the indices for the extant reads from the first deletion onwards.
+        for (int r = firstDeleted; r < newSampleReadCount; r++) {
+            indexByRead.put(newSampleReads[r], r);
+        }
+
+        // Then we skim out the likelihoods of the removed reads.
+        final double[][] oldSampleValues = valuesBySampleIndex[sampleIndex];
+        final double[][] newSampleValues = new double[alleleCount][newSampleReadCount];
+        for (int a = 0; a < alleleCount; a++) {
+            System.arraycopy(oldSampleValues[a],0,newSampleValues[a],0,firstDeleted);
+            Utils.skimArray(oldSampleValues[a], firstDeleted, newSampleValues[a], firstDeleted, removeIndex, firstDeleted);
+        }
+        valuesBySampleIndex[sampleIndex] = newSampleValues;
+        readsBySampleIndex[sampleIndex] = newSampleReads;
+        readListBySampleIndex[sampleIndex] = null; // reset the unmodifiable list.
+    }
+
+    /**
+     * Transform into a multi-sample HashMap backed {@link PerReadAlleleLikelihoodMap} type.
+     * @return never {@code null}.
+     *
+     */
+    @Deprecated
+    @SuppressWarnings("all")
+    public Map<String, PerReadAlleleLikelihoodMap> toPerReadAlleleLikelihoodMap () {
+        final int sampleCount = samples.length;
+        final Map<String,PerReadAlleleLikelihoodMap> result = new HashMap<>(sampleCount);
+        for (int s = 0; s < sampleCount; s++)
+            result.put(samples[s],toPerReadAlleleLikelihoodMap(s));
+        return result;
+    }
+
+    /**
+     * Transform into a single-sample HashMap backed {@link PerReadAlleleLikelihoodMap} type.
+     *
+     * @return never {@code null}.
+     */
+    @Deprecated
+    public PerReadAlleleLikelihoodMap toPerReadAlleleLikelihoodMap(final int sampleIndex) {
+        checkSampleIndex(sampleIndex);
+        final PerReadAlleleLikelihoodMap result = new PerReadAlleleLikelihoodMap();
+        final int alleleCount = alleles.length;
+        final GATKSAMRecord[] sampleReads = readsBySampleIndex[sampleIndex];
+        final int sampleReadCount = sampleReads.length;
+        for (int a = 0; a < alleleCount; a++) {
+            final A allele = alleles[a];
+            final double[] readLikelihoods = valuesBySampleIndex[sampleIndex][a];
+            for (int r = 0; r < sampleReadCount; r++)
+                result.add(sampleReads[r], allele, readLikelihoods[r]);
+        }
+        return result;
+    }
+
+    /**
+     * Implements a likelihood matrix per sample given its index.
+     */
+    private class SampleMatrix implements Matrix<A> {
+
+        private final int sampleIndex;
+
+        private SampleMatrix(final int sampleIndex) {
+            this.sampleIndex = sampleIndex;
+        }
+
+        @Override
+        public List<GATKSAMRecord> reads() {
+            return sampleReads(sampleIndex);
+        }
+
+        @Override
+        public List<A> alleles() {
+            return ReadLikelihoods.this.alleles();
+        }
+
+        @Override
+        public void set(final int alleleIndex, final int readIndex, final double value) {
+            valuesBySampleIndex[sampleIndex][alleleIndex][readIndex] = value;
+        }
+
+        @Override
+        public double get(final int alleleIndex, final int readIndex) {
+            return valuesBySampleIndex[sampleIndex][alleleIndex][readIndex];
+        }
+
+        @Override
+        public int alleleIndex(final A allele) {
+            return ReadLikelihoods.this.alleleIndex(allele);
+        }
+
+        @Override
+        public int readIndex(final GATKSAMRecord read) {
+            return ReadLikelihoods.this.readIndex(sampleIndex,read);
+        }
+
+        @Override
+        public int alleleCount() {
+            return alleles.length;
+        }
+
+        @Override
+        public int readCount() {
+            return readsBySampleIndex[sampleIndex].length;
+        }
+
+        @Override
+        public A allele(int alleleIndex) {
+            return ReadLikelihoods.this.allele(alleleIndex);
+        }
+
+        @Override
+        public GATKSAMRecord read(final int readIndex) {
+            if (readIndex < 0)
+                throw new IllegalArgumentException("the read-index cannot be negative");
+            final GATKSAMRecord[] sampleReads = readsBySampleIndex[sampleIndex];
+            if (readIndex >= sampleReads.length)
+                throw new IllegalArgumentException("the read-index is beyond the read count of the sample");
+            return sampleReads[readIndex];
+        }
+    }
+
+    /**
+     * Checks whether the provide sample index is valid.
+     * <p>
+     *     If not, it throws an exception.
+     * </p>
+     * @param sampleIndex the target sample index.
+     *
+     * @throws IllegalArgumentException if {@code sampleIndex} is invalid, i.e. outside the range [0,{@link #sampleCount}).
+     */
+    private void checkSampleIndex(final int sampleIndex) {
+        if (sampleIndex < 0 || sampleIndex >= samples.length)
+            throw new IllegalArgumentException("invalid sample index: " + sampleIndex);
+    }
+
+    /**
+     * Checks whether the provide allele index is valid.
+     * <p>
+     *     If not, it throws an exception.
+     * </p>
+     * @param alleleIndex the target sample index.
+     *
+     * @throws IllegalArgumentException if {@code alleleIndex} is invalid, i.e. outside the range [0,{@link #sampleCount}).
+     */
+    private void checkAlleleIndex(final int alleleIndex) {
+        if (alleleIndex < 0 || alleleIndex >= alleles.length)
+            throw new IllegalArgumentException("invalid allele index: " + alleleIndex);
+    }
+}
\ No newline at end of file