remove alt alleles, when genotype count is explosively large, based on alleles' highest supporting haplotype score; max tolerable genotype count is controlled by a default value overridable by user
remove alt alleles, when genotype count is explosively large, based on alleles' highest supporting haplotype score; max tolerable genotype count is controlled by a default value overridable by user
This commit is contained in:
parent
6471bda0cd
commit
1ff234e7dd
|
|
@ -122,12 +122,32 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
|
||||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||||
* that you not play around with this parameter.
|
* that you not play around with this parameter.
|
||||||
*
|
*
|
||||||
* As of GATK 2.2 the genotyper can handle a very large number of events, so the default maximum has been increased to 6.
|
* See also {@link #MAX_GENOTYPE_COUNT}.
|
||||||
*/
|
*/
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "max_alternate_alleles", shortName = MAX_ALTERNATE_ALLELES_SHORT_NAME, doc = "Maximum number of alternate alleles to genotype", required = false)
|
@Argument(fullName = "max_alternate_alleles", shortName = MAX_ALTERNATE_ALLELES_SHORT_NAME, doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||||
public int MAX_ALTERNATE_ALLELES = 6;
|
public int MAX_ALTERNATE_ALLELES = 6;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If there are more than this number of genotypes at a locus presented to the genotyper, then only this many genotypes will be used.
|
||||||
|
* The possible genotypes are simply different ways of partitioning alleles given a specific ploidy asumption.
|
||||||
|
* Therefore, we remove genotypes from consideration by removing alternate alleles that are the least well supported.
|
||||||
|
* The estimate of allele support is based on the ranking of the candidate haplotypes coming out of the graph building step.
|
||||||
|
* Note that the reference allele is always kept.
|
||||||
|
*
|
||||||
|
* Note that genotyping sites with large genotype counts is both CPU and memory intensive.
|
||||||
|
* Unless there is a good reason to change the default value, we highly recommend that you not play around with this parameter.
|
||||||
|
*
|
||||||
|
* The maximum number of alternative alleles used in the genotyping step will be the lesser of the two:
|
||||||
|
* 1. the largest number of alt alleles, given ploidy, that yields a genotype count no higher than {@link #MAX_GENOTYPE_COUNT}
|
||||||
|
* 2. the value of {@link #MAX_ALTERNATE_ALLELES}
|
||||||
|
*
|
||||||
|
* See also {@link #MAX_ALTERNATE_ALLELES}.
|
||||||
|
*/
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName = "max_genotype_count", shortName = "maxGT", doc = "Maximum number of genotypes to consider at any site", required = false)
|
||||||
|
public int MAX_GENOTYPE_COUNT = 1024;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines the maximum number of PL values that will be logged in the output. If the number of genotypes
|
* Determines the maximum number of PL values that will be logged in the output. If the number of genotypes
|
||||||
* (which is determined by the ploidy and the number of alleles) exceeds the value provided by this argument,
|
* (which is determined by the ploidy and the number of alleles) exceeds the value provided by this argument,
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@ public class GenotypeLikelihoodCalculator {
|
||||||
* <p>This is in fact a shallow copy if {@link GenotypeLikelihoodCalculators#ploidyLog10}</p> and is not meant to be modified by
|
* <p>This is in fact a shallow copy if {@link GenotypeLikelihoodCalculators#ploidyLog10}</p> and is not meant to be modified by
|
||||||
* this class. </p>
|
* this class. </p>
|
||||||
*/
|
*/
|
||||||
private final double[] log10;
|
private final double[] ploidyLog10;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Buffer field use as a temporal container for sorted allele counts when calculating the likelihood of a
|
* Buffer field use as a temporal container for sorted allele counts when calculating the likelihood of a
|
||||||
|
|
@ -202,24 +202,22 @@ public class GenotypeLikelihoodCalculator {
|
||||||
* Creates a new calculator providing its ploidy and number of genotyping alleles.
|
* Creates a new calculator providing its ploidy and number of genotyping alleles.
|
||||||
*/
|
*/
|
||||||
protected GenotypeLikelihoodCalculator(final int ploidy, final int alleleCount,
|
protected GenotypeLikelihoodCalculator(final int ploidy, final int alleleCount,
|
||||||
final int[][] alleleFirstGenotypeOffsetByPloidy,
|
final int[][] alleleFirstGenotypeOffsetByPloidy,
|
||||||
final GenotypeAlleleCounts[][] genotypeTableByPloidy,
|
final GenotypeAlleleCounts[][] genotypeTableByPloidy,
|
||||||
final double[] ploidyLog10) {
|
final double[] ploidyLog10) {
|
||||||
|
|
||||||
this.alleleFirstGenotypeOffsetByPloidy = alleleFirstGenotypeOffsetByPloidy;
|
this.alleleFirstGenotypeOffsetByPloidy = alleleFirstGenotypeOffsetByPloidy;
|
||||||
genotypeAlleleCounts = genotypeTableByPloidy[ploidy];
|
genotypeAlleleCounts = genotypeTableByPloidy[ploidy];
|
||||||
this.alleleCount = alleleCount;
|
this.alleleCount = alleleCount;
|
||||||
this.ploidy = ploidy;
|
this.ploidy = ploidy;
|
||||||
genotypeCount = this.alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount];
|
genotypeCount = this.alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount];
|
||||||
if (genotypeCount == GenotypeLikelihoodCalculators.GENOTYPE_COUNT_OVERFLOW)
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
String.format("the combination of ploidy (%s) and number of alleles (%s) results in a very large number of genotypes (> %s). You need to limit ploidy or the number of alternative alleles to analyze this locus",
|
|
||||||
ploidy,alleleCount,Integer.MAX_VALUE));
|
|
||||||
alleleHeap = new IntMaxHeap(ploidy);
|
alleleHeap = new IntMaxHeap(ploidy);
|
||||||
readLikelihoodsByGenotypeIndex = new double[genotypeCount][];
|
readLikelihoodsByGenotypeIndex = new double[genotypeCount][];
|
||||||
log10 = ploidyLog10;
|
this.ploidyLog10 = ploidyLog10;
|
||||||
// The number of possible components is limited by distinct allele count and ploidy.
|
// The number of possible components is limited by distinct allele count and ploidy.
|
||||||
maximumDistinctAllelesInGenotype = Math.min(ploidy, alleleCount);
|
maximumDistinctAllelesInGenotype = Math.min(ploidy, alleleCount);
|
||||||
genotypeAllelesAndCounts = new int[maximumDistinctAllelesInGenotype << 1];
|
genotypeAllelesAndCounts = new int[maximumDistinctAllelesInGenotype*2];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -349,7 +347,7 @@ public class GenotypeLikelihoodCalculator {
|
||||||
*/
|
*/
|
||||||
private double[] genotypeLikelihoods(final double[][] readLikelihoodsByGenotypeIndex, final int readCount) {
|
private double[] genotypeLikelihoods(final double[][] readLikelihoodsByGenotypeIndex, final int readCount) {
|
||||||
final double[] result = new double[genotypeCount];
|
final double[] result = new double[genotypeCount];
|
||||||
final double denominator = readCount * log10[ploidy]; // instead of dividing each read likelihood by ploidy
|
final double denominator = readCount * ploidyLog10[ploidy]; // instead of dividing each read likelihood by ploidy
|
||||||
// ( so subtract log10(ploidy) ) we multiply them all and the divide by ploidy^readCount (so substract readCount * log10(ploidy) )
|
// ( so subtract log10(ploidy) ) we multiply them all and the divide by ploidy^readCount (so substract readCount * log10(ploidy) )
|
||||||
for (int g = 0; g < genotypeCount; g++) {
|
for (int g = 0; g < genotypeCount; g++) {
|
||||||
final double[] likelihoodsByRead = readLikelihoodsByGenotypeIndex[g];
|
final double[] likelihoodsByRead = readLikelihoodsByGenotypeIndex[g];
|
||||||
|
|
@ -464,7 +462,9 @@ public class GenotypeLikelihoodCalculator {
|
||||||
* exactly one allele present in the genotype.
|
* exactly one allele present in the genotype.
|
||||||
*/
|
*/
|
||||||
private void singleComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts,
|
private void singleComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts,
|
||||||
final double[] likelihoodByRead, final double[] readLikelihoodComponentsByAlleleCount, final int readCount) {
|
final double[] likelihoodByRead,
|
||||||
|
final double[] readLikelihoodComponentsByAlleleCount,
|
||||||
|
final int readCount) {
|
||||||
final int allele = genotypeAlleleCounts.alleleIndexAt(0);
|
final int allele = genotypeAlleleCounts.alleleIndexAt(0);
|
||||||
// the count of the only component must be = ploidy.
|
// the count of the only component must be = ploidy.
|
||||||
int offset = (allele * (ploidy + 1) + ploidy) * readCount;
|
int offset = (allele * (ploidy + 1) + ploidy) * readCount;
|
||||||
|
|
@ -493,7 +493,7 @@ public class GenotypeLikelihoodCalculator {
|
||||||
|
|
||||||
// p = 2 because the frequency == 1 we already have it.
|
// p = 2 because the frequency == 1 we already have it.
|
||||||
for (int frequency = 2, destinationOffset = frequency1Offset + readCount; frequency <= ploidy; frequency++) {
|
for (int frequency = 2, destinationOffset = frequency1Offset + readCount; frequency <= ploidy; frequency++) {
|
||||||
final double log10frequency = log10[frequency];
|
final double log10frequency = ploidyLog10[frequency];
|
||||||
for (int r = 0, sourceOffset = frequency1Offset; r < readCount; r++)
|
for (int r = 0, sourceOffset = frequency1Offset; r < readCount; r++)
|
||||||
readAlleleLikelihoodByAlleleCount[destinationOffset++] =
|
readAlleleLikelihoodByAlleleCount[destinationOffset++] =
|
||||||
readAlleleLikelihoodByAlleleCount[sourceOffset++] + log10frequency;
|
readAlleleLikelihoodByAlleleCount[sourceOffset++] + log10frequency;
|
||||||
|
|
@ -620,7 +620,11 @@ public class GenotypeLikelihoodCalculator {
|
||||||
* @param destination where to store the new genotype index mapping to old.
|
* @param destination where to store the new genotype index mapping to old.
|
||||||
* @param sortedAlleleCountsBuffer a buffer to re-use to get the genotype-allele-count's sorted allele counts.
|
* @param sortedAlleleCountsBuffer a buffer to re-use to get the genotype-allele-count's sorted allele counts.
|
||||||
*/
|
*/
|
||||||
private void genotypeIndexMapPerGenotypeIndex(final int newGenotypeIndex, final GenotypeAlleleCounts alleleCounts, final int[] oldToNewAlleleIndexMap, final int[] destination, final int[] sortedAlleleCountsBuffer) {
|
private void genotypeIndexMapPerGenotypeIndex(final int newGenotypeIndex,
|
||||||
|
final GenotypeAlleleCounts alleleCounts,
|
||||||
|
final int[] oldToNewAlleleIndexMap,
|
||||||
|
final int[] destination,
|
||||||
|
final int[] sortedAlleleCountsBuffer) {
|
||||||
final int distinctAlleleCount = alleleCounts.distinctAlleleCount();
|
final int distinctAlleleCount = alleleCounts.distinctAlleleCount();
|
||||||
alleleCounts.copyAlleleCounts(sortedAlleleCountsBuffer,0);
|
alleleCounts.copyAlleleCounts(sortedAlleleCountsBuffer,0);
|
||||||
for (int j = 0, jj = 0; j < distinctAlleleCount; j++) {
|
for (int j = 0, jj = 0; j < distinctAlleleCount; j++) {
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,11 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.GATKException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Genotype likelihood calculator utility.
|
* Genotype likelihood calculator utility.
|
||||||
|
|
@ -116,7 +120,10 @@ public class GenotypeLikelihoodCalculators {
|
||||||
private volatile static GenotypeAlleleCounts[][] genotypeTableByPloidy =
|
private volatile static GenotypeAlleleCounts[][] genotypeTableByPloidy =
|
||||||
buildGenotypeAlleleCountsTable(maximumPloidy,maximumAllele,alleleFirstGenotypeOffsetByPloidy);
|
buildGenotypeAlleleCountsTable(maximumPloidy,maximumAllele,alleleFirstGenotypeOffsetByPloidy);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cached log10 values for the first integer up to the maximum ploidy requested thus far.
|
||||||
|
*/
|
||||||
|
private volatile static double[] ploidyLog10 = IntStream.range(0, maximumPloidy + 1).mapToDouble(Math::log10).toArray();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the table with the genotype offsets based on ploidy and the maximum allele index with representation
|
* Build the table with the genotype offsets based on ploidy and the maximum allele index with representation
|
||||||
|
|
@ -291,40 +298,29 @@ public class GenotypeLikelihoodCalculators {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Cached log10 values for the first integer up to the maximum ploidy requested thus far.
|
|
||||||
*/
|
|
||||||
private volatile static double[] ploidyLog10;
|
|
||||||
|
|
||||||
// Initialize {@link #ploidyLog10}.
|
|
||||||
static {
|
|
||||||
ploidyLog10 = new double[maximumPloidy + 1];
|
|
||||||
for (int i = 0; i <= maximumPloidy; i++)
|
|
||||||
ploidyLog10[i] = Math.log10(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an instance given its ploidy and the number of alleles.
|
* Returns an instance given its ploidy and the number of alleles.
|
||||||
*
|
*
|
||||||
* @param alleleCount the required allele-count.
|
* @param alleleCount the required allele-count.
|
||||||
* @param ploidy the required ploidy-count.
|
* @param ploidy the required ploidy-count.
|
||||||
*
|
*
|
||||||
* @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is {@code null}, or
|
* @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is negative, or the resulting number of genotypes is too large.
|
||||||
* the resulting number of genotypes is too large.
|
|
||||||
*
|
*
|
||||||
* @return never {@code null}.
|
* @return never {@code null}.
|
||||||
*/
|
*/
|
||||||
public static GenotypeLikelihoodCalculator getInstance(final int ploidy,
|
public static GenotypeLikelihoodCalculator getInstance(final int ploidy, final int alleleCount) {
|
||||||
final int alleleCount) {
|
|
||||||
checkPloidyAndMaximumAllele(ploidy, alleleCount);
|
checkPloidyAndMaximumAllele(ploidy, alleleCount);
|
||||||
|
|
||||||
// Non-thread safe (fast) check on tables capacities,
|
// Non-thread safe (fast) check on tables capacities,
|
||||||
// if not enough capacity we expand the tables in a thread-safe manner:
|
// if not enough capacity we expand the tables in a thread-safe manner
|
||||||
if (alleleCount > maximumAllele || ploidy > maximumPloidy)
|
// also checks if the requested ploidy and allele count result in a genotype count too large to deal with
|
||||||
ensureCapacity(alleleCount, ploidy);
|
if(calculateGenotypeCountUsingTables(ploidy, alleleCount) == GENOTYPE_COUNT_OVERFLOW){
|
||||||
|
final double largeGenotypeCount = MathUtils.binomialCoefficient(ploidy + alleleCount - 1, alleleCount - 1);
|
||||||
|
throw new IllegalArgumentException(String.format("the number of genotypes is too large for ploidy %d and allele %d: approx. %.0f", ploidy, alleleCount, largeGenotypeCount));
|
||||||
|
}
|
||||||
|
|
||||||
// At this point the tables must have at least the requested capacity, likely to be much more.
|
// At this point the tables must have at least the requested capacity, likely to be much more.
|
||||||
return new GenotypeLikelihoodCalculator(ploidy,alleleCount,alleleFirstGenotypeOffsetByPloidy,genotypeTableByPloidy,ploidyLog10);
|
return new GenotypeLikelihoodCalculator(ploidy, alleleCount, alleleFirstGenotypeOffsetByPloidy, genotypeTableByPloidy, ploidyLog10);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -413,14 +409,59 @@ public class GenotypeLikelihoodCalculators {
|
||||||
* @param ploidy the requested ploidy.
|
* @param ploidy the requested ploidy.
|
||||||
* @param alleleCount the requested number of alleles.
|
* @param alleleCount the requested number of alleles.
|
||||||
*
|
*
|
||||||
* @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative.
|
* @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative or
|
||||||
|
* the number of genotypes is too large (more than {@link Integer#MAX_VALUE}).
|
||||||
*
|
*
|
||||||
* @return 0 or greater.
|
* @return the number of genotypes given ploidy and allele count (0 or greater).
|
||||||
*/
|
*/
|
||||||
public final static int genotypeCount(final int ploidy, final int alleleCount) {
|
public final static int genotypeCount(final int ploidy, final int alleleCount) {
|
||||||
|
|
||||||
|
final int result = calculateGenotypeCountUsingTables(ploidy, alleleCount);
|
||||||
|
if (result == GENOTYPE_COUNT_OVERFLOW) {
|
||||||
|
final double largeGenotypeCount = MathUtils.binomialCoefficient(ploidy + alleleCount - 1, alleleCount - 1);
|
||||||
|
throw new IllegalArgumentException(String.format("the number of genotypes is too large for ploidy %d and allele %d: approx. %.0f", ploidy, alleleCount, largeGenotypeCount));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the maximally acceptable allele count (ref allele included) given the maximally acceptable genotype count.
|
||||||
|
* @param ploidy sample ploidy
|
||||||
|
* @param maxGenotypeCount maximum number of genotype count used to calculate upper bound on number of alleles given ploidy
|
||||||
|
* @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative.
|
||||||
|
* @return the maximally acceptable allele count given ploidy and maximum number of genotypes acceptable
|
||||||
|
*/
|
||||||
|
public static int computeMaxAcceptableAlleleCount(final int ploidy, final int maxGenotypeCount){
|
||||||
|
|
||||||
|
checkPloidyAndMaximumAllele(ploidy, ploidy); // a hack to check ploidy makes sense (could duplicate code but choice must be made)
|
||||||
|
|
||||||
|
final double log10MaxGenotypeCount = Math.log10(maxGenotypeCount);
|
||||||
|
|
||||||
|
// Math explanation: genotype count is determined by ${P+A-1 \choose A-1}$, this leads to constraint
|
||||||
|
// $\log(\frac{(P+A-1)!}{(A-1)!}) \le \log(P!G)$,
|
||||||
|
// where $P$ is ploidy, $A$ is allele count, and $G$ is maxGenotypeCount
|
||||||
|
// The upper and lower bounds of the left hand side of the constraint are $P \log(A-1+P)$ and $P \log(A)$
|
||||||
|
// which require $A$ to be searched in interval $[10^{\log(P!G)/P} - (P-1), 10^{\log(P!G)/P}]$
|
||||||
|
// Denote $[10^{\log(P!G)/P}$ as $x$ in the code.
|
||||||
|
|
||||||
|
final double x = Math.pow(10, (MathUtils.log10Factorial(ploidy) + log10MaxGenotypeCount)/ploidy );
|
||||||
|
final int lower = (int)Math.floor(x) - ploidy - 1;
|
||||||
|
final int upper = (int)Math.ceil(x);
|
||||||
|
for(int a=upper; a>=lower; --a){// check one by one
|
||||||
|
|
||||||
|
final double log10GTCnt = MathUtils.log10BinomialCoefficient(ploidy+a-1, a-1);
|
||||||
|
if(log10MaxGenotypeCount >= log10GTCnt) {
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new GATKException("Code should never reach here.");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int calculateGenotypeCountUsingTables(int ploidy, int alleleCount) {
|
||||||
checkPloidyAndMaximumAllele(ploidy, alleleCount);
|
checkPloidyAndMaximumAllele(ploidy, alleleCount);
|
||||||
if (ploidy > maximumPloidy || alleleCount > maximumAllele)
|
if (ploidy > maximumPloidy || alleleCount > maximumAllele) {
|
||||||
ensureCapacity(alleleCount,ploidy);
|
ensureCapacity(alleleCount, ploidy);
|
||||||
|
}
|
||||||
return alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount];
|
return alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -56,6 +56,8 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import htsjdk.samtools.util.StringUtil;
|
import htsjdk.samtools.util.StringUtil;
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.apache.commons.math3.stat.StatUtils;
|
||||||
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
||||||
import org.broadinstitute.gatk.utils.*;
|
import org.broadinstitute.gatk.utils.*;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
|
|
@ -73,6 +75,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@link HaplotypeCaller}'s genotyping strategy implementation.
|
* {@link HaplotypeCaller}'s genotyping strategy implementation.
|
||||||
|
|
@ -84,6 +87,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
private static final String phase10 = "1|0";
|
private static final String phase10 = "1|0";
|
||||||
private static final int MAX_DROPPED_ALTERNATIVE_ALLELES_LOG_STRING_LENGTH = 500;
|
private static final int MAX_DROPPED_ALTERNATIVE_ALLELES_LOG_STRING_LENGTH = 500;
|
||||||
|
|
||||||
|
private final int MAX_GENOTYPE_COUNT_TO_ENUMERATE = configuration.genotypeArgs.MAX_GENOTYPE_COUNT;
|
||||||
|
private static final Map<Integer, Integer> practicaAlleleCountForPloidy = new HashMap<>();
|
||||||
|
|
||||||
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
|
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
|
||||||
|
|
||||||
protected final boolean doPhysicalPhasing;
|
protected final boolean doPhysicalPhasing;
|
||||||
|
|
@ -135,7 +141,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Carries the result of a call to #assignGenotypeLikelihoods
|
* Carries the result of a call to {@link #assignGenotypeLikelihoods(List, ReadLikelihoods, Map, byte[], GenomeLoc, GenomeLoc, GenomeLocParser, RefMetaDataTracker, List, boolean)}
|
||||||
*/
|
*/
|
||||||
public static class CalledHaplotypes {
|
public static class CalledHaplotypes {
|
||||||
private final List<VariantContext> calls;
|
private final List<VariantContext> calls;
|
||||||
|
|
@ -189,16 +195,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
// TODO - can this be refactored? this is hard to follow!
|
// TODO - can this be refactored? this is hard to follow!
|
||||||
CalledHaplotypes assignGenotypeLikelihoods( final List<Haplotype> haplotypes,
|
CalledHaplotypes assignGenotypeLikelihoods(final List<Haplotype> haplotypes,
|
||||||
final ReadLikelihoods<Haplotype> readLikelihoods,
|
final ReadLikelihoods<Haplotype> readLikelihoods,
|
||||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||||
final byte[] ref,
|
final byte[] ref,
|
||||||
final GenomeLoc refLoc,
|
final GenomeLoc refLoc,
|
||||||
final GenomeLoc activeRegionWindow,
|
final GenomeLoc activeRegionWindow,
|
||||||
final GenomeLocParser genomeLocParser,
|
final GenomeLocParser genomeLocParser,
|
||||||
final RefMetaDataTracker tracker,
|
final RefMetaDataTracker tracker,
|
||||||
final List<VariantContext> activeAllelesToGenotype,
|
final List<VariantContext> activeAllelesToGenotype,
|
||||||
final boolean emitReferenceConfidence) {
|
final boolean emitReferenceConfidence) {
|
||||||
// sanity check input arguments
|
// sanity check input arguments
|
||||||
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
||||||
if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods);
|
if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods);
|
||||||
|
|
@ -232,15 +238,17 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
|
|
||||||
// Merge the event to find a common reference representation
|
// Merge the event to find a common reference representation
|
||||||
|
|
||||||
VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList,
|
VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc,
|
||||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
priorityList,
|
||||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE,
|
||||||
|
false, false, null, false, false);
|
||||||
|
|
||||||
if( mergedVC == null )
|
if( mergedVC == null )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
|
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP
|
||||||
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
: GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||||
|
|
||||||
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
|
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
|
||||||
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
||||||
|
|
@ -248,13 +256,37 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
|
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
|
||||||
}
|
}
|
||||||
|
|
||||||
final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
|
|
||||||
|
|
||||||
if( configuration.DEBUG && logger != null ) {
|
if( configuration.DEBUG && logger != null ) {
|
||||||
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
|
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
|
||||||
}
|
}
|
||||||
|
|
||||||
final ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
|
final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
|
||||||
|
|
||||||
|
// if the number of allele is so high that enumerating all possible genotypes is impractical,
|
||||||
|
// as determined by MAX_GENOTYPE_COUNT_TO_ENUMERATE,
|
||||||
|
// trim alleles that are not well supported by good-scored haplotypes,
|
||||||
|
// otherwise alias, i.e. no trimming if genotype count is small
|
||||||
|
final int originalAlleleCount = alleleMapper.size();
|
||||||
|
Integer practicalAlleleCount = practicaAlleleCountForPloidy.get(ploidy);
|
||||||
|
if(practicalAlleleCount==null) { // maximum allele count given this ploidy and MAX_GENOTYPE_COUNT_TO_ENUMERATE hasn't been computed
|
||||||
|
practicalAlleleCount = GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(ploidy, MAX_GENOTYPE_COUNT_TO_ENUMERATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Allele, List<Haplotype>> practicalAlleleMapper = null;
|
||||||
|
if (practicalAlleleCount < originalAlleleCount) {
|
||||||
|
practicalAlleleMapper = reduceNumberOfAlternativeAllelesBasedOnHaplotypesScores(alleleMapper, practicalAlleleCount);
|
||||||
|
if( configuration.DEBUG && logger != null ) {
|
||||||
|
logger.warn(String.format("Removed alt alleles where ploidy is %d and original allele count is %d, whereas after trimming the allele count becomes %d",
|
||||||
|
ploidy, originalAlleleCount, practicalAlleleCount));
|
||||||
|
logger.warn(String.format("Alleles kept are:%s", practicalAlleleMapper.keySet()));
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
practicalAlleleMapper = alleleMapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
final ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(practicalAlleleMapper,
|
||||||
|
genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC),
|
||||||
|
ALLELE_EXTENSION));
|
||||||
if (configuration.isSampleContaminationPresent())
|
if (configuration.isSampleContaminationPresent())
|
||||||
readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination());
|
readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination());
|
||||||
|
|
||||||
|
|
@ -269,10 +301,23 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
readAlleleLikelihoods.addNonReferenceAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
readAlleleLikelihoods.addNonReferenceAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
}
|
}
|
||||||
|
|
||||||
final GenotypesContext genotypes = calculateGLsForThisEvent(readAlleleLikelihoods, noCallAlleles );
|
final GenotypesContext genotypes = calculateGLsForThisEvent(readAlleleLikelihoods, noCallAlleles);
|
||||||
|
|
||||||
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).alleles(readAlleleLikelihoods.alleles()).genotypes(genotypes).make(), calculationModel);
|
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).alleles(readAlleleLikelihoods.alleles()).genotypes(genotypes).make(), calculationModel);
|
||||||
if ( call != null ) {
|
if ( call != null ) {
|
||||||
final VariantContext annotatedCall = annotateCall(readLikelihoods, perSampleFilteredReadList, ref, refLoc, genomeLocParser, tracker, emitReferenceConfidence, calledHaplotypes, mergedVC, alleleMapper, readAlleleLikelihoods, someAllelesWereDropped, call);
|
final VariantContext annotatedCall = annotateCall(readLikelihoods,
|
||||||
|
perSampleFilteredReadList,
|
||||||
|
ref,
|
||||||
|
refLoc,
|
||||||
|
genomeLocParser,
|
||||||
|
tracker,
|
||||||
|
emitReferenceConfidence,
|
||||||
|
calledHaplotypes,
|
||||||
|
mergedVC,
|
||||||
|
alleleMapper,
|
||||||
|
readAlleleLikelihoods,
|
||||||
|
someAllelesWereDropped,
|
||||||
|
call);
|
||||||
returnCalls.add( annotatedCall );
|
returnCalls.add( annotatedCall );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -300,7 +345,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
final boolean someAlternativeAllelesWereDropped = call.getAlleles().size() != initialAlleleNumber;
|
final boolean someAlternativeAllelesWereDropped = call.getAlleles().size() != initialAlleleNumber;
|
||||||
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker,readAlleleLikelihoodsForAnnotation, call, emitReferenceConfidence);
|
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker,readAlleleLikelihoodsForAnnotation, call, emitReferenceConfidence);
|
||||||
if (someAlternativeAllelesWereDropped || someAlternativeAllelesWereAlreadyDropped)
|
if (someAlternativeAllelesWereDropped || someAlternativeAllelesWereAlreadyDropped)
|
||||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||||
|
|
||||||
// maintain the set of all called haplotypes
|
// maintain the set of all called haplotypes
|
||||||
for ( final Allele calledAllele : call.getAlleles() ) {
|
for ( final Allele calledAllele : call.getAlleles() ) {
|
||||||
|
|
@ -312,6 +357,78 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
return !emitReferenceConfidence ? clearUnconfidentGenotypeCalls(annotatedCall) : annotatedCall;
|
return !emitReferenceConfidence ? clearUnconfidentGenotypeCalls(annotatedCall) : annotatedCall;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trim excessive alt alleles when the combination of ploidy and alleles is so large that exhaustively enumerating
|
||||||
|
* all possible genotypes is impractical.
|
||||||
|
* Use the highest haplotype score of each allele to decide which ones to trim away, and in case of tie, use the second best.
|
||||||
|
* @param alleleMapper original allele to haplotype map to be trimmed
|
||||||
|
* @param desiredNumOfAlleles desired allele count (ref allele won't be removed)
|
||||||
|
* @return trimmed map of the desired size
|
||||||
|
*/
|
||||||
|
private Map<Allele, List<Haplotype>> reduceNumberOfAlternativeAllelesBasedOnHaplotypesScores(final Map<Allele, List<Haplotype>> alleleMapper, final int desiredNumOfAlleles) {
|
||||||
|
|
||||||
|
final PriorityQueue<AlleleScoredByHaplotypeScores> altAlleleMaxPriorityQ = new PriorityQueue<>((sa1, sa2) -> - sa2.compareTo(sa1)); // -1 to turn it into max priority q
|
||||||
|
|
||||||
|
final Set<Allele> allelesToRetain = new HashSet<>();
|
||||||
|
// populate allelePriorityQ with the relevant information
|
||||||
|
for(final Allele allele : alleleMapper.keySet()){
|
||||||
|
|
||||||
|
if(allele.isReference()){ // collect scores information only on alt alleles; ref allele is never trimmed by this function
|
||||||
|
allelesToRetain.add(allele);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Double> hapScores = alleleMapper.get(allele).stream().map(hap -> hap.getScore()).collect(Collectors.toList());
|
||||||
|
Collections.sort(hapScores);
|
||||||
|
final Double highestScore = hapScores.get(hapScores.size()-1);
|
||||||
|
final Double secondHighestScore = hapScores.size()>1 ? hapScores.get(hapScores.size()-2) : Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
|
altAlleleMaxPriorityQ.add(new AlleleScoredByHaplotypeScores(allele, highestScore, secondHighestScore));
|
||||||
|
}
|
||||||
|
|
||||||
|
while(allelesToRetain.size()<desiredNumOfAlleles){
|
||||||
|
allelesToRetain.add(altAlleleMaxPriorityQ.poll().getAllele());
|
||||||
|
}
|
||||||
|
|
||||||
|
return alleleMapper.entrySet().stream().filter(p -> allelesToRetain.contains(p.getKey()))
|
||||||
|
.collect(Collectors.toMap(p->p.getKey(), p->p.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A utility class that provides ordering information, given best and second best haplotype scores.
|
||||||
|
* If there's a tie between the two alleles when comparing their best haplotype score, the second best haplotype score
|
||||||
|
* is used for breaking the tie. In the case that one allele doesn't have a second best allele, i.e. it has only one
|
||||||
|
* supportive haplotype, its second best score is set as null, and is always considered "worse" than another allele
|
||||||
|
* that has the same best haplotype score, but also has a second best haplotype score.
|
||||||
|
* TODO: in the extremely unlikely case that two alleles, having the same best haplotype score, neither have a second
|
||||||
|
* best haplotype score, the case is undecided.
|
||||||
|
*/
|
||||||
|
private static final class AlleleScoredByHaplotypeScores {
|
||||||
|
private final Allele allele;
|
||||||
|
private final Double bestHaplotypeScore;
|
||||||
|
private final Double secondBestHaplotypeScore;
|
||||||
|
|
||||||
|
public AlleleScoredByHaplotypeScores(final Allele allele, final Double bestHaplotypeScore, final Double secondBestHaplotypeScore){
|
||||||
|
this.allele = allele;
|
||||||
|
this.bestHaplotypeScore = bestHaplotypeScore;
|
||||||
|
this.secondBestHaplotypeScore = secondBestHaplotypeScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareTo(final AlleleScoredByHaplotypeScores other) {
|
||||||
|
if(bestHaplotypeScore > other.bestHaplotypeScore) {
|
||||||
|
return 1;
|
||||||
|
} else if (bestHaplotypeScore < other.bestHaplotypeScore) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return secondBestHaplotypeScore > other.secondBestHaplotypeScore ? 1 : -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Allele getAllele(){
|
||||||
|
return allele;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reduce the number alternative alleles in a read-likelihoods collection to the maximum-alt-allele user parameter value.
|
* Reduce the number alternative alleles in a read-likelihoods collection to the maximum-alt-allele user parameter value.
|
||||||
* <p>
|
* <p>
|
||||||
|
|
@ -462,7 +579,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static Map<VariantContext, Set<Haplotype>> constructHaplotypeMapping(final List<VariantContext> originalCalls,
|
static Map<VariantContext, Set<Haplotype>> constructHaplotypeMapping(final List<VariantContext> originalCalls,
|
||||||
final Set<Haplotype> calledHaplotypes) {
|
final Set<Haplotype> calledHaplotypes) {
|
||||||
final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>(originalCalls.size());
|
final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>(originalCalls.size());
|
||||||
for ( final VariantContext call : originalCalls ) {
|
for ( final VariantContext call : originalCalls ) {
|
||||||
// don't try to phase if there is not exactly 1 alternate allele
|
// don't try to phase if there is not exactly 1 alternate allele
|
||||||
|
|
@ -674,14 +791,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
|
|
||||||
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
|
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
|
||||||
// used for genotyping.
|
// used for genotyping.
|
||||||
protected ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
|
protected ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(final ReadLikelihoods<Haplotype> readHaplotypeLikelihoods,
|
||||||
final ReadLikelihoods<Haplotype> readHaplotypeLikelihoods,
|
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
final GenomeLocParser genomeLocParser,
|
||||||
final GenomeLocParser genomeLocParser,
|
final boolean emitReferenceConfidence,
|
||||||
final boolean emitReferenceConfidence,
|
final Map<Allele, List<Haplotype>> alleleMapper,
|
||||||
final Map<Allele, List<Haplotype>> alleleMapper,
|
final ReadLikelihoods<Allele> readAlleleLikelihoodsForGenotyping,
|
||||||
final ReadLikelihoods<Allele> readAlleleLikelihoodsForGenotyping,
|
final VariantContext call) {
|
||||||
final VariantContext call) {
|
|
||||||
|
|
||||||
final ReadLikelihoods<Allele> readAlleleLikelihoodsForAnnotations;
|
final ReadLikelihoods<Allele> readAlleleLikelihoodsForAnnotations;
|
||||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(call);
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(call);
|
||||||
|
|
@ -744,10 +860,10 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
* @return never {@code null} but perhaps an empty list if there is no variants to report.
|
* @return never {@code null} but perhaps an empty list if there is no variants to report.
|
||||||
*/
|
*/
|
||||||
protected TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
|
protected TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
|
||||||
final ReadLikelihoods readLikelihoods,
|
final ReadLikelihoods readLikelihoods,
|
||||||
final byte[] ref,
|
final byte[] ref,
|
||||||
final GenomeLoc refLoc,
|
final GenomeLoc refLoc,
|
||||||
final List<VariantContext> activeAllelesToGenotype) {
|
final List<VariantContext> activeAllelesToGenotype) {
|
||||||
final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
|
final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
|
||||||
|
|
||||||
// Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
|
// Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
|
||||||
|
|
@ -782,8 +898,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
|
protected List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
|
||||||
final int loc,
|
final int loc,
|
||||||
final List<VariantContext> activeAllelesToGenotype) {
|
final List<VariantContext> activeAllelesToGenotype) {
|
||||||
// the overlapping events to merge into a common reference view
|
// the overlapping events to merge into a common reference view
|
||||||
final List<VariantContext> eventsAtThisLoc = new ArrayList<>();
|
final List<VariantContext> eventsAtThisLoc = new ArrayList<>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -170,6 +170,15 @@ public class GenotypeLikelihoodCalculatorUnitTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComputeMaxAcceptableAlleleCount(){
|
||||||
|
Assert.assertEquals(1024, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(1, 1024));
|
||||||
|
Assert.assertEquals(44, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(2, 1024));
|
||||||
|
Assert.assertEquals(17, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(3, 1024));
|
||||||
|
Assert.assertEquals(5, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(10, 1024));
|
||||||
|
Assert.assertEquals(3, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(20, 1024));
|
||||||
|
Assert.assertEquals(2, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(100, 1024));
|
||||||
|
}
|
||||||
// Simple inefficient calculation of the genotype count given the ploidy.
|
// Simple inefficient calculation of the genotype count given the ploidy.
|
||||||
private int calculateGenotypeCount(final int ploidy, final int alleleCount) {
|
private int calculateGenotypeCount(final int ploidy, final int alleleCount) {
|
||||||
if (ploidy == 0)
|
if (ploidy == 0)
|
||||||
|
|
|
||||||
|
|
@ -130,6 +130,7 @@ public class Haplotype extends Allele {
|
||||||
final Haplotype ret = new Haplotype(newBases, isReference());
|
final Haplotype ret = new Haplotype(newBases, isReference());
|
||||||
ret.setCigar(newCigar);
|
ret.setCigar(newCigar);
|
||||||
ret.setGenomeLocation(loc);
|
ret.setGenomeLocation(loc);
|
||||||
|
ret.setScore(score);
|
||||||
ret.setAlignmentStartHapwrtRef(newStart + getAlignmentStartHapwrtRef());
|
ret.setAlignmentStartHapwrtRef(newStart + getAlignmentStartHapwrtRef());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue