diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java index d8c10145f..4a5456d94 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/arguments/GenotypeCalculationArgumentCollection.java @@ -122,12 +122,32 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{ * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend * that you not play around with this parameter. * - * As of GATK 2.2 the genotyper can handle a very large number of events, so the default maximum has been increased to 6. + * See also {@link #MAX_GENOTYPE_COUNT}. */ @Advanced @Argument(fullName = "max_alternate_alleles", shortName = MAX_ALTERNATE_ALLELES_SHORT_NAME, doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 6; + /** + * If there are more than this number of genotypes at a locus presented to the genotyper, then only this many genotypes will be used. + * The possible genotypes are simply different ways of partitioning alleles given a specific ploidy asumption. + * Therefore, we remove genotypes from consideration by removing alternate alleles that are the least well supported. + * The estimate of allele support is based on the ranking of the candidate haplotypes coming out of the graph building step. + * Note that the reference allele is always kept. + * + * Note that genotyping sites with large genotype counts is both CPU and memory intensive. + * Unless there is a good reason to change the default value, we highly recommend that you not play around with this parameter. + * + * The maximum number of alternative alleles used in the genotyping step will be the lesser of the two: + * 1. the largest number of alt alleles, given ploidy, that yields a genotype count no higher than {@link #MAX_GENOTYPE_COUNT} + * 2. the value of {@link #MAX_ALTERNATE_ALLELES} + * + * See also {@link #MAX_ALTERNATE_ALLELES}. + */ + @Advanced + @Argument(fullName = "max_genotype_count", shortName = "maxGT", doc = "Maximum number of genotypes to consider at any site", required = false) + public int MAX_GENOTYPE_COUNT = 1024; + /** * Determines the maximum number of PL values that will be logged in the output. If the number of genotypes * (which is determined by the ploidy and the number of alleles) exceeds the value provided by this argument, diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java index 1a3cdf2b8..aead2ab15 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java @@ -163,7 +163,7 @@ public class GenotypeLikelihoodCalculator { *

This is in fact a shallow copy if {@link GenotypeLikelihoodCalculators#ploidyLog10}

and is not meant to be modified by * this class.

*/ - private final double[] log10; + private final double[] ploidyLog10; /** * Buffer field use as a temporal container for sorted allele counts when calculating the likelihood of a @@ -202,24 +202,22 @@ public class GenotypeLikelihoodCalculator { * Creates a new calculator providing its ploidy and number of genotyping alleles. */ protected GenotypeLikelihoodCalculator(final int ploidy, final int alleleCount, - final int[][] alleleFirstGenotypeOffsetByPloidy, - final GenotypeAlleleCounts[][] genotypeTableByPloidy, - final double[] ploidyLog10) { + final int[][] alleleFirstGenotypeOffsetByPloidy, + final GenotypeAlleleCounts[][] genotypeTableByPloidy, + final double[] ploidyLog10) { + this.alleleFirstGenotypeOffsetByPloidy = alleleFirstGenotypeOffsetByPloidy; genotypeAlleleCounts = genotypeTableByPloidy[ploidy]; this.alleleCount = alleleCount; this.ploidy = ploidy; genotypeCount = this.alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount]; - if (genotypeCount == GenotypeLikelihoodCalculators.GENOTYPE_COUNT_OVERFLOW) - throw new IllegalArgumentException( - String.format("the combination of ploidy (%s) and number of alleles (%s) results in a very large number of genotypes (> %s). You need to limit ploidy or the number of alternative alleles to analyze this locus", - ploidy,alleleCount,Integer.MAX_VALUE)); + alleleHeap = new IntMaxHeap(ploidy); readLikelihoodsByGenotypeIndex = new double[genotypeCount][]; - log10 = ploidyLog10; + this.ploidyLog10 = ploidyLog10; // The number of possible components is limited by distinct allele count and ploidy. maximumDistinctAllelesInGenotype = Math.min(ploidy, alleleCount); - genotypeAllelesAndCounts = new int[maximumDistinctAllelesInGenotype << 1]; + genotypeAllelesAndCounts = new int[maximumDistinctAllelesInGenotype*2]; } /** @@ -349,7 +347,7 @@ public class GenotypeLikelihoodCalculator { */ private double[] genotypeLikelihoods(final double[][] readLikelihoodsByGenotypeIndex, final int readCount) { final double[] result = new double[genotypeCount]; - final double denominator = readCount * log10[ploidy]; // instead of dividing each read likelihood by ploidy + final double denominator = readCount * ploidyLog10[ploidy]; // instead of dividing each read likelihood by ploidy // ( so subtract log10(ploidy) ) we multiply them all and the divide by ploidy^readCount (so substract readCount * log10(ploidy) ) for (int g = 0; g < genotypeCount; g++) { final double[] likelihoodsByRead = readLikelihoodsByGenotypeIndex[g]; @@ -464,7 +462,9 @@ public class GenotypeLikelihoodCalculator { * exactly one allele present in the genotype. */ private void singleComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts, - final double[] likelihoodByRead, final double[] readLikelihoodComponentsByAlleleCount, final int readCount) { + final double[] likelihoodByRead, + final double[] readLikelihoodComponentsByAlleleCount, + final int readCount) { final int allele = genotypeAlleleCounts.alleleIndexAt(0); // the count of the only component must be = ploidy. int offset = (allele * (ploidy + 1) + ploidy) * readCount; @@ -493,7 +493,7 @@ public class GenotypeLikelihoodCalculator { // p = 2 because the frequency == 1 we already have it. for (int frequency = 2, destinationOffset = frequency1Offset + readCount; frequency <= ploidy; frequency++) { - final double log10frequency = log10[frequency]; + final double log10frequency = ploidyLog10[frequency]; for (int r = 0, sourceOffset = frequency1Offset; r < readCount; r++) readAlleleLikelihoodByAlleleCount[destinationOffset++] = readAlleleLikelihoodByAlleleCount[sourceOffset++] + log10frequency; @@ -620,7 +620,11 @@ public class GenotypeLikelihoodCalculator { * @param destination where to store the new genotype index mapping to old. * @param sortedAlleleCountsBuffer a buffer to re-use to get the genotype-allele-count's sorted allele counts. */ - private void genotypeIndexMapPerGenotypeIndex(final int newGenotypeIndex, final GenotypeAlleleCounts alleleCounts, final int[] oldToNewAlleleIndexMap, final int[] destination, final int[] sortedAlleleCountsBuffer) { + private void genotypeIndexMapPerGenotypeIndex(final int newGenotypeIndex, + final GenotypeAlleleCounts alleleCounts, + final int[] oldToNewAlleleIndexMap, + final int[] destination, + final int[] sortedAlleleCountsBuffer) { final int distinctAlleleCount = alleleCounts.distinctAlleleCount(); alleleCounts.copyAlleleCounts(sortedAlleleCountsBuffer,0); for (int j = 0, jj = 0; j < distinctAlleleCount; j++) { diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java index 83b280bff..da0ff0975 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java @@ -51,7 +51,11 @@ package org.broadinstitute.gatk.tools.walkers.genotyper; +import org.broadinstitute.gatk.utils.MathUtils; +import org.broadinstitute.gatk.utils.exceptions.GATKException; + import java.util.Arrays; +import java.util.stream.IntStream; /** * Genotype likelihood calculator utility. @@ -116,7 +120,10 @@ public class GenotypeLikelihoodCalculators { private volatile static GenotypeAlleleCounts[][] genotypeTableByPloidy = buildGenotypeAlleleCountsTable(maximumPloidy,maximumAllele,alleleFirstGenotypeOffsetByPloidy); - + /** + * Cached log10 values for the first integer up to the maximum ploidy requested thus far. + */ + private volatile static double[] ploidyLog10 = IntStream.range(0, maximumPloidy + 1).mapToDouble(Math::log10).toArray(); /** * Build the table with the genotype offsets based on ploidy and the maximum allele index with representation @@ -291,40 +298,29 @@ public class GenotypeLikelihoodCalculators { return result; } - /** - * Cached log10 values for the first integer up to the maximum ploidy requested thus far. - */ - private volatile static double[] ploidyLog10; - - // Initialize {@link #ploidyLog10}. - static { - ploidyLog10 = new double[maximumPloidy + 1]; - for (int i = 0; i <= maximumPloidy; i++) - ploidyLog10[i] = Math.log10(i); - } - /** * Returns an instance given its ploidy and the number of alleles. * * @param alleleCount the required allele-count. * @param ploidy the required ploidy-count. * - * @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is {@code null}, or - * the resulting number of genotypes is too large. + * @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is negative, or the resulting number of genotypes is too large. * * @return never {@code null}. */ - public static GenotypeLikelihoodCalculator getInstance(final int ploidy, - final int alleleCount) { + public static GenotypeLikelihoodCalculator getInstance(final int ploidy, final int alleleCount) { checkPloidyAndMaximumAllele(ploidy, alleleCount); // Non-thread safe (fast) check on tables capacities, - // if not enough capacity we expand the tables in a thread-safe manner: - if (alleleCount > maximumAllele || ploidy > maximumPloidy) - ensureCapacity(alleleCount, ploidy); + // if not enough capacity we expand the tables in a thread-safe manner + // also checks if the requested ploidy and allele count result in a genotype count too large to deal with + if(calculateGenotypeCountUsingTables(ploidy, alleleCount) == GENOTYPE_COUNT_OVERFLOW){ + final double largeGenotypeCount = MathUtils.binomialCoefficient(ploidy + alleleCount - 1, alleleCount - 1); + throw new IllegalArgumentException(String.format("the number of genotypes is too large for ploidy %d and allele %d: approx. %.0f", ploidy, alleleCount, largeGenotypeCount)); + } // At this point the tables must have at least the requested capacity, likely to be much more. - return new GenotypeLikelihoodCalculator(ploidy,alleleCount,alleleFirstGenotypeOffsetByPloidy,genotypeTableByPloidy,ploidyLog10); + return new GenotypeLikelihoodCalculator(ploidy, alleleCount, alleleFirstGenotypeOffsetByPloidy, genotypeTableByPloidy, ploidyLog10); } /** @@ -413,14 +409,59 @@ public class GenotypeLikelihoodCalculators { * @param ploidy the requested ploidy. * @param alleleCount the requested number of alleles. * - * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative. + * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative or + * the number of genotypes is too large (more than {@link Integer#MAX_VALUE}). * - * @return 0 or greater. + * @return the number of genotypes given ploidy and allele count (0 or greater). */ public final static int genotypeCount(final int ploidy, final int alleleCount) { + + final int result = calculateGenotypeCountUsingTables(ploidy, alleleCount); + if (result == GENOTYPE_COUNT_OVERFLOW) { + final double largeGenotypeCount = MathUtils.binomialCoefficient(ploidy + alleleCount - 1, alleleCount - 1); + throw new IllegalArgumentException(String.format("the number of genotypes is too large for ploidy %d and allele %d: approx. %.0f", ploidy, alleleCount, largeGenotypeCount)); + } + return result; + } + + /** + * Compute the maximally acceptable allele count (ref allele included) given the maximally acceptable genotype count. + * @param ploidy sample ploidy + * @param maxGenotypeCount maximum number of genotype count used to calculate upper bound on number of alleles given ploidy + * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative. + * @return the maximally acceptable allele count given ploidy and maximum number of genotypes acceptable + */ + public static int computeMaxAcceptableAlleleCount(final int ploidy, final int maxGenotypeCount){ + + checkPloidyAndMaximumAllele(ploidy, ploidy); // a hack to check ploidy makes sense (could duplicate code but choice must be made) + + final double log10MaxGenotypeCount = Math.log10(maxGenotypeCount); + + // Math explanation: genotype count is determined by ${P+A-1 \choose A-1}$, this leads to constraint + // $\log(\frac{(P+A-1)!}{(A-1)!}) \le \log(P!G)$, + // where $P$ is ploidy, $A$ is allele count, and $G$ is maxGenotypeCount + // The upper and lower bounds of the left hand side of the constraint are $P \log(A-1+P)$ and $P \log(A)$ + // which require $A$ to be searched in interval $[10^{\log(P!G)/P} - (P-1), 10^{\log(P!G)/P}]$ + // Denote $[10^{\log(P!G)/P}$ as $x$ in the code. + + final double x = Math.pow(10, (MathUtils.log10Factorial(ploidy) + log10MaxGenotypeCount)/ploidy ); + final int lower = (int)Math.floor(x) - ploidy - 1; + final int upper = (int)Math.ceil(x); + for(int a=upper; a>=lower; --a){// check one by one + + final double log10GTCnt = MathUtils.log10BinomialCoefficient(ploidy+a-1, a-1); + if(log10MaxGenotypeCount >= log10GTCnt) { + return a; + } + } + throw new GATKException("Code should never reach here."); + } + + private static int calculateGenotypeCountUsingTables(int ploidy, int alleleCount) { checkPloidyAndMaximumAllele(ploidy, alleleCount); - if (ploidy > maximumPloidy || alleleCount > maximumAllele) - ensureCapacity(alleleCount,ploidy); + if (ploidy > maximumPloidy || alleleCount > maximumAllele) { + ensureCapacity(alleleCount, ploidy); + } return alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount]; } } \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 30e7efe22..ad0f2b773 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -56,6 +56,8 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import htsjdk.samtools.util.StringUtil; import htsjdk.variant.variantcontext.*; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.math3.stat.StatUtils; import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection; import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.contexts.ReferenceContext; @@ -73,6 +75,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; +import java.util.stream.Collectors; /** * {@link HaplotypeCaller}'s genotyping strategy implementation. @@ -84,6 +87,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine practicaAlleleCountForPloidy = new HashMap<>(); + private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; protected final boolean doPhysicalPhasing; @@ -135,7 +141,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine calls; @@ -189,16 +195,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine 0"}) @Ensures("result != null") // TODO - can this be refactored? this is hard to follow! - CalledHaplotypes assignGenotypeLikelihoods( final List haplotypes, - final ReadLikelihoods readLikelihoods, - final Map> perSampleFilteredReadList, - final byte[] ref, - final GenomeLoc refLoc, - final GenomeLoc activeRegionWindow, - final GenomeLocParser genomeLocParser, - final RefMetaDataTracker tracker, - final List activeAllelesToGenotype, - final boolean emitReferenceConfidence) { + CalledHaplotypes assignGenotypeLikelihoods(final List haplotypes, + final ReadLikelihoods readLikelihoods, + final Map> perSampleFilteredReadList, + final byte[] ref, + final GenomeLoc refLoc, + final GenomeLoc activeRegionWindow, + final GenomeLocParser genomeLocParser, + final RefMetaDataTracker tracker, + final List activeAllelesToGenotype, + final boolean emitReferenceConfidence) { // sanity check input arguments if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes); if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods); @@ -232,15 +238,17 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine mergeMap = new LinkedHashMap<>(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele @@ -248,13 +256,37 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine> alleleMapper = createAlleleMapper(mergeMap, eventMapper); - if( configuration.DEBUG && logger != null ) { if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); } - final ReadLikelihoods readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION)); + final Map> alleleMapper = createAlleleMapper(mergeMap, eventMapper); + + // if the number of allele is so high that enumerating all possible genotypes is impractical, + // as determined by MAX_GENOTYPE_COUNT_TO_ENUMERATE, + // trim alleles that are not well supported by good-scored haplotypes, + // otherwise alias, i.e. no trimming if genotype count is small + final int originalAlleleCount = alleleMapper.size(); + Integer practicalAlleleCount = practicaAlleleCountForPloidy.get(ploidy); + if(practicalAlleleCount==null) { // maximum allele count given this ploidy and MAX_GENOTYPE_COUNT_TO_ENUMERATE hasn't been computed + practicalAlleleCount = GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(ploidy, MAX_GENOTYPE_COUNT_TO_ENUMERATE); + } + + Map> practicalAlleleMapper = null; + if (practicalAlleleCount < originalAlleleCount) { + practicalAlleleMapper = reduceNumberOfAlternativeAllelesBasedOnHaplotypesScores(alleleMapper, practicalAlleleCount); + if( configuration.DEBUG && logger != null ) { + logger.warn(String.format("Removed alt alleles where ploidy is %d and original allele count is %d, whereas after trimming the allele count becomes %d", + ploidy, originalAlleleCount, practicalAlleleCount)); + logger.warn(String.format("Alleles kept are:%s", practicalAlleleMapper.keySet())); + } + }else{ + practicalAlleleMapper = alleleMapper; + } + + final ReadLikelihoods readAlleleLikelihoods = readLikelihoods.marginalize(practicalAlleleMapper, + genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), + ALLELE_EXTENSION)); if (configuration.isSampleContaminationPresent()) readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination()); @@ -269,10 +301,23 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine> reduceNumberOfAlternativeAllelesBasedOnHaplotypesScores(final Map> alleleMapper, final int desiredNumOfAlleles) { + + final PriorityQueue altAlleleMaxPriorityQ = new PriorityQueue<>((sa1, sa2) -> - sa2.compareTo(sa1)); // -1 to turn it into max priority q + + final Set allelesToRetain = new HashSet<>(); + // populate allelePriorityQ with the relevant information + for(final Allele allele : alleleMapper.keySet()){ + + if(allele.isReference()){ // collect scores information only on alt alleles; ref allele is never trimmed by this function + allelesToRetain.add(allele); + continue; + } + + final List hapScores = alleleMapper.get(allele).stream().map(hap -> hap.getScore()).collect(Collectors.toList()); + Collections.sort(hapScores); + final Double highestScore = hapScores.get(hapScores.size()-1); + final Double secondHighestScore = hapScores.size()>1 ? hapScores.get(hapScores.size()-2) : Double.NEGATIVE_INFINITY; + + altAlleleMaxPriorityQ.add(new AlleleScoredByHaplotypeScores(allele, highestScore, secondHighestScore)); + } + + while(allelesToRetain.size() allelesToRetain.contains(p.getKey())) + .collect(Collectors.toMap(p->p.getKey(), p->p.getValue())); + } + + /** + * A utility class that provides ordering information, given best and second best haplotype scores. + * If there's a tie between the two alleles when comparing their best haplotype score, the second best haplotype score + * is used for breaking the tie. In the case that one allele doesn't have a second best allele, i.e. it has only one + * supportive haplotype, its second best score is set as null, and is always considered "worse" than another allele + * that has the same best haplotype score, but also has a second best haplotype score. + * TODO: in the extremely unlikely case that two alleles, having the same best haplotype score, neither have a second + * best haplotype score, the case is undecided. + */ + private static final class AlleleScoredByHaplotypeScores { + private final Allele allele; + private final Double bestHaplotypeScore; + private final Double secondBestHaplotypeScore; + + public AlleleScoredByHaplotypeScores(final Allele allele, final Double bestHaplotypeScore, final Double secondBestHaplotypeScore){ + this.allele = allele; + this.bestHaplotypeScore = bestHaplotypeScore; + this.secondBestHaplotypeScore = secondBestHaplotypeScore; + } + + public int compareTo(final AlleleScoredByHaplotypeScores other) { + if(bestHaplotypeScore > other.bestHaplotypeScore) { + return 1; + } else if (bestHaplotypeScore < other.bestHaplotypeScore) { + return -1; + } else { + return secondBestHaplotypeScore > other.secondBestHaplotypeScore ? 1 : -1; + } + } + + public Allele getAllele(){ + return allele; + } + } + /** * Reduce the number alternative alleles in a read-likelihoods collection to the maximum-alt-allele user parameter value. *

@@ -462,7 +579,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine> constructHaplotypeMapping(final List originalCalls, - final Set calledHaplotypes) { + final Set calledHaplotypes) { final Map> haplotypeMap = new HashMap<>(originalCalls.size()); for ( final VariantContext call : originalCalls ) { // don't try to phase if there is not exactly 1 alternate allele @@ -674,14 +791,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine prepareReadAlleleLikelihoodsForAnnotation( - final ReadLikelihoods readHaplotypeLikelihoods, - final Map> perSampleFilteredReadList, - final GenomeLocParser genomeLocParser, - final boolean emitReferenceConfidence, - final Map> alleleMapper, - final ReadLikelihoods readAlleleLikelihoodsForGenotyping, - final VariantContext call) { + protected ReadLikelihoods prepareReadAlleleLikelihoodsForAnnotation(final ReadLikelihoods readHaplotypeLikelihoods, + final Map> perSampleFilteredReadList, + final GenomeLocParser genomeLocParser, + final boolean emitReferenceConfidence, + final Map> alleleMapper, + final ReadLikelihoods readAlleleLikelihoodsForGenotyping, + final VariantContext call) { final ReadLikelihoods readAlleleLikelihoodsForAnnotations; final GenomeLoc loc = genomeLocParser.createGenomeLoc(call); @@ -744,10 +860,10 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine decomposeHaplotypesIntoVariantContexts(final List haplotypes, - final ReadLikelihoods readLikelihoods, - final byte[] ref, - final GenomeLoc refLoc, - final List activeAllelesToGenotype) { + final ReadLikelihoods readLikelihoods, + final byte[] ref, + final GenomeLoc refLoc, + final List activeAllelesToGenotype) { final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty(); // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file @@ -782,8 +898,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine getVCsAtThisLocation(final List haplotypes, - final int loc, - final List activeAllelesToGenotype) { + final int loc, + final List activeAllelesToGenotype) { // the overlapping events to merge into a common reference view final List eventsAtThisLoc = new ArrayList<>(); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java index e7a9c9467..27c7ac04e 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java @@ -170,6 +170,15 @@ public class GenotypeLikelihoodCalculatorUnitTest { } + @Test + public void testComputeMaxAcceptableAlleleCount(){ + Assert.assertEquals(1024, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(1, 1024)); + Assert.assertEquals(44, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(2, 1024)); + Assert.assertEquals(17, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(3, 1024)); + Assert.assertEquals(5, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(10, 1024)); + Assert.assertEquals(3, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(20, 1024)); + Assert.assertEquals(2, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(100, 1024)); + } // Simple inefficient calculation of the genotype count given the ploidy. private int calculateGenotypeCount(final int ploidy, final int alleleCount) { if (ploidy == 0) diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/haplotype/Haplotype.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/haplotype/Haplotype.java index 28df151c9..59459b027 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/haplotype/Haplotype.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/haplotype/Haplotype.java @@ -130,6 +130,7 @@ public class Haplotype extends Allele { final Haplotype ret = new Haplotype(newBases, isReference()); ret.setCigar(newCigar); ret.setGenomeLocation(loc); + ret.setScore(score); ret.setAlignmentStartHapwrtRef(newStart + getAlignmentStartHapwrtRef()); return ret; }