From 09ac3779d63625f390f42a50be8b5a72e84c7718 Mon Sep 17 00:00:00 2001
From: Valentin Ruano-Rubio
+ * If the {@code dest} array provide is not long enough a new one will be created and returned with the
+ * same component type. All elements before {@code destOffset} will be copied from the input to the
+ * result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where
+ * the position preceding {@code destOffset} will be left with the default value. The component type
+ * Will match the one of the {@code source} array.
+ *
+ * If the {@code dest} array provide is not long enough a new one will be created and returned with the
+ * same component type. All elements before {@code destOffset} will be copied from the input to the
+ * result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where
+ * the position preceding {@code destOffset} will be left with the default value. The component type
+ * Will match the one of the {@code source} array.
+ *
+ * When you can use {@link Arrays#copyOf} or an array {@link Object#clone()} to create a copy of itself,
+ * if it is multi-dimentional each sub array or matrix would be cloned.
+ *
+ * Notice however that if the base type is an Object type, the base elements themselves wont be cloned.
+ *
+ * valuesBySampleIndex[s][a][r] == lnLk(R_r | A_a) where R_r comes from Sample s.
+ *
+ * The initial likelihoods for all allele-read combinations are
+ * 0.
+ *
+ * The matrix is indexed by allele and the by read index.
+ *
+ *
+ * result[a][r] == lnLk(Read_r | Allele_a)
+ *
+ *
+ * The matrix is live and changes to it update the likelihood in the collection, please use with care. + *
+ * + * @param sampleIndex the sample index. + * + * @return never {@code null}. + */ + /* package */ double[][] sampleValues(final int sampleIndex) { + checkSampleIndex(sampleIndex); + return valuesBySampleIndex[sampleIndex]; + } + + + /** + * Returns a read vs allele likelihood matrix corresponding to a sample. + * + * @param sampleIndex target sample. + * + * @throws IllegalArgumentException if {@code sampleIndex} is not null. + * + * @return never {@code null} + */ + public Matrix sampleMatrix(final int sampleIndex) { + checkSampleIndex(sampleIndex); + final Matrix extantResult = sampleMatrices[sampleIndex]; + if (extantResult != null) + return extantResult; + else + return sampleMatrices[sampleIndex] = new SampleMatrix(sampleIndex); + } + + /** + * Adjusts likelihoods so that for each read, the best allele likelihood is 0 and caps the minimum likelihood + * of any allele for each read based on the maximum alternative allele likelihood. + * + * @param bestToZero set the best likelihood to 0, others will be subtracted the same amount. + * @param maximumLikelihoodDifferenceCap maximum difference between the best alternative allele likelihood + * and any other likelihood. + * + * @throws IllegalArgumentException if {@code maximumDifferenceWithBestAlternative} is not 0 or less. + */ + public void normalizeLikelihoods(final boolean bestToZero, final double maximumLikelihoodDifferenceCap) { + if (maximumLikelihoodDifferenceCap >= 0.0 || Double.isNaN(maximumLikelihoodDifferenceCap)) + throw new IllegalArgumentException("the minimum reference likelihood fall cannot be positive"); + + if (maximumLikelihoodDifferenceCap == Double.NEGATIVE_INFINITY && !bestToZero) + return; + + final int alleleCount = alleles.length; + if (alleleCount == 0) // trivial case there is no alleles. + return; + else if (alleleCount == 1 && !bestToZero) + return; + + for (int s = 0; s < valuesBySampleIndex.length; s++) { + final double[][] sampleValues = valuesBySampleIndex[s]; + final int readCount = readsBySampleIndex[s].length; + for (int r = 0; r < readCount; r++) + normalizeLikelihoodsPerRead(bestToZero, maximumLikelihoodDifferenceCap, sampleValues, s, r); + } + } + + // Does the normalizeLikelihoods job for each read. + private void normalizeLikelihoodsPerRead(final boolean bestToZero, final double maximumBestAltLikelihoodDifference, + final double[][] sampleValues, final int sampleIndex, final int readIndex) { + + final BestAllele bestAlternativeAllele = searchBestAllele(sampleIndex,readIndex,false); + + final double worstLikelihoodCap = bestAlternativeAllele.likelihood + maximumBestAltLikelihoodDifference; + + final double referenceLikelihood = referenceAlleleIndex == -1 ? Double.NEGATIVE_INFINITY : + sampleValues[referenceAlleleIndex][readIndex]; + + + final double bestAbsoluteLikelihood = Math.max(bestAlternativeAllele.likelihood,referenceLikelihood); + + if (bestToZero) { + if (bestAbsoluteLikelihood == Double.NEGATIVE_INFINITY) + for (int a = 0; a < alleles.length; a++) + sampleValues[a][readIndex] = 0; + else if (worstLikelihoodCap != Double.NEGATIVE_INFINITY) + for (int a = 0; a < alleles.length; a++) + sampleValues[a][readIndex] = (sampleValues[a][readIndex] < worstLikelihoodCap ? worstLikelihoodCap : sampleValues[a][readIndex]) - bestAbsoluteLikelihood; + else + for (int a = 0; a < alleles.length; a++) + sampleValues[a][readIndex] -= bestAbsoluteLikelihood; + } else // else if (maximumReferenceLikelihoodFall != Double.NEGATIVE_INFINITY ) { // + // Guarantee to be the case by enclosing code. + for (int a = 0; a < alleles.length; a++) + if (sampleValues[a][readIndex] < worstLikelihoodCap) + sampleValues[a][readIndex] = worstLikelihoodCap; + } + + /** + * Returns the samples in this read-likelihood collection. + *+ * Samples are sorted by their index in the collection. + *
+ * + *+ * The returned list is an unmodifiable view on the read-likelihoods sample list. + *
+ * + * @return never {@code null}. + */ + public List+ * Samples are sorted by their index in the collection. + *
+ * + *+ * The returned list is an unmodifiable. It will not be updated if the collection + * allele list changes. + *
+ * + * @return never {@code null}. + */ + public List alleles() { + if (alleleList == null) + alleleList = Collections.unmodifiableList(Arrays.asList(alleles)); + return alleleList; + } + + /** + * Search the best allele for a read. + * + * @param sampleIndex including sample index. + * @param readIndex target read index. + * + * @return never {@code null}, but with {@link BestAllele#allele allele} == {@code null} + * if non-could be found. + */ + private BestAllele searchBestAllele(final int sampleIndex, final int readIndex, final boolean canBeReference) { + final int alleleCount = alleles.length; + if (alleleCount == 0 || (alleleCount == 1 && referenceAlleleIndex == 0 && !canBeReference)) + return new BestAllele(sampleIndex,readIndex,-1,Double.NEGATIVE_INFINITY,Double.NEGATIVE_INFINITY); + + final double[][] sampleValues = valuesBySampleIndex[sampleIndex]; + int bestAlleleIndex = canBeReference || referenceAlleleIndex != 0 ? 0 : 1; + + double bestLikelihood = sampleValues[bestAlleleIndex][readIndex]; + double secondBestLikelihood = Double.NEGATIVE_INFINITY; + for (int a = bestAlleleIndex + 1; a < alleleCount; a++) { + if (!canBeReference && referenceAlleleIndex == a) + continue; + final double candidateLikelihood = sampleValues[a][readIndex]; + if (candidateLikelihood > bestLikelihood) { + bestAlleleIndex = a; + secondBestLikelihood = bestLikelihood; + bestLikelihood = candidateLikelihood; + } else if (candidateLikelihood > secondBestLikelihood) { + secondBestLikelihood = candidateLikelihood; + } + } + return new BestAllele(sampleIndex,readIndex,bestAlleleIndex,bestLikelihood,secondBestLikelihood); + } + + public void changeReads(final Map+ * This method modifies the current read-likelihoods collection. + *
+ * + * @param location the target location. + * + * @throws IllegalArgumentException the location cannot be {@code null} nor unmapped. + */ + @SuppressWarnings("unused") + public void filterToOnlyOverlappingUnclippedReads(final GenomeLoc location) { + if (location == null) + throw new IllegalArgumentException("the location cannot be null"); + if (location.isUnmapped()) + throw new IllegalArgumentException("the location cannot be unmapped"); + + final int sampleCount = samples.length; + + final int locContig = location.getContigIndex(); + final int locStart = location.getStart(); + final int locEnd = location.getStop(); + + final Set+ * This is determined by a maximum error per read-base against the best likelihood possible. + *
+ * + * @param maximumErrorPerBase the minimum acceptable error rate per read base, must be + * a positive number. + * + * @throws IllegalStateException is not supported for read-likelihood that do not contain alleles. + * + * @throws IllegalArgumentException if {@code maximumErrorPerBase} is negative. + */ + public void filterPoorlyModeledReads(final double maximumErrorPerBase) { + if (alleles.length == 0) + throw new IllegalStateException("unsupported for read-likelihood collections with no alleles"); + if (Double.isNaN(maximumErrorPerBase) || maximumErrorPerBase <= 0.0) + throw new IllegalArgumentException("the maximum error per base must be a positive number"); + final int sampleCount = samples.length; + + final SetNothing will happen if the read-likelihoods collection already includes the non-ref allele
+ * + *+ * Implementation note: even when strictly speaking we do not need to demand the calling code to pass + * the reference the non-ref allele, we still demand it in order to lead the + * the calling code to use the right generic type for this likelihoods + * collection {@link Allele}. + *
+ * + * @param nonRefAllele the non-ref allele. + * + * @throws IllegalArgumentException if {@code nonRefAllele} is anything but the designated <NON_REF> + * symbolic allele {@link GATKVariantContextUtils#NON_REF_SYMBOLIC_ALLELE}. + */ + public void addNonReferenceAllele(final A nonRefAllele) { + + if (nonRefAllele == null) + throw new IllegalArgumentException("non-ref allele cannot be null"); + if (!nonRefAllele.equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)) + throw new IllegalArgumentException("the non-ref allele is not valid"); + // Already present? + if (alleleIndex.containsKey(nonRefAllele)) + return; + + final int alleleCount = alleles.length; + final int newAlleleCount = alleleCount + 1; + alleles = Arrays.copyOf(alleles,newAlleleCount); + alleles[alleleCount] = nonRefAllele; + alleleIndex.put(nonRefAllele,alleleCount); + alleleList = null; // remove the cached alleleList. + + final int sampleCount = samples.length; + for (int s = 0; s < sampleCount; s++) + addNonReferenceAlleleLikelihoodsPerSample(alleleCount, newAlleleCount, s); + } + + // Updates per-sample structures according to the addition of the NON_REF allele. + private void addNonReferenceAlleleLikelihoodsPerSample(final int alleleCount, final int newAlleleCount, final int sampleIndex) { + final double[][] sampleValues = valuesBySampleIndex[sampleIndex] = Arrays.copyOf(valuesBySampleIndex[sampleIndex], newAlleleCount); + final int sampleReadCount = readsBySampleIndex[sampleIndex].length; + + final double[] nonRefAlleleLikelihoods = sampleValues[alleleCount] = new double [sampleReadCount]; + Arrays.fill(nonRefAlleleLikelihoods,Double.NEGATIVE_INFINITY); + for (int r = 0; r < sampleReadCount; r++) { + final BestAllele bestAllele = searchBestAllele(sampleIndex,r,true); + final double secondBestLikelihood = Double.isInfinite(bestAllele.confidence) ? bestAllele.likelihood + : bestAllele.likelihood - bestAllele.confidence; + nonRefAlleleLikelihoods[r] = secondBestLikelihood; + } + } + + /** + * Downsamples reads based on contamination fractions making sure that all alleles are affected proportionally. + * + * @param perSampleDownsamplingFraction contamination sample map where the sample name are the keys and the + * fractions are the values. + * + * @throws IllegalArgumentException if {@code perSampleDownsamplingFraction} is {@code null}. + */ + public void contaminationDownsampling(final Map+ * If not, it throws an exception. + *
+ * @param sampleIndex the target sample index. + * + * @throws IllegalArgumentException if {@code sampleIndex} is invalid, i.e. outside the range [0,{@link #sampleCount}). + */ + private void checkSampleIndex(final int sampleIndex) { + if (sampleIndex < 0 || sampleIndex >= samples.length) + throw new IllegalArgumentException("invalid sample index: " + sampleIndex); + } + + /** + * Checks whether the provide allele index is valid. + *+ * If not, it throws an exception. + *
+ * @param alleleIndex the target sample index. + * + * @throws IllegalArgumentException if {@code alleleIndex} is invalid, i.e. outside the range [0,{@link #sampleCount}). + */ + private void checkAlleleIndex(final int alleleIndex) { + if (alleleIndex < 0 || alleleIndex >= alleles.length) + throw new IllegalArgumentException("invalid allele index: " + alleleIndex); + } +} \ No newline at end of file