diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index ca105fe03..c379b34dc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -204,7 +204,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Advanced @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) - protected int maxNumHaplotypesInPopulation = 13; + protected int maxNumHaplotypesInPopulation = 25; @Advanced @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) @@ -557,8 +557,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final List bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? - likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes ); + final List bestHaplotypes = selectBestHaplotypesForGenotyping(haplotypes, stratifiedReadMap); final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine, bestHaplotypes, @@ -586,6 +585,22 @@ public class HaplotypeCaller extends ActiveRegionWalker implem return 1; // One active region was processed during this map call } + /** + * Select the best N haplotypes according to their likelihoods, if appropriate + * + * @param haplotypes a list of haplotypes to consider + * @param stratifiedReadMap a map from samples -> read likelihoods + * @return the list of haplotypes to genotype + */ + protected List selectBestHaplotypesForGenotyping(final List haplotypes, final Map stratifiedReadMap) { + // TODO -- skip this calculation if the list of haplotypes is of size 2 (as we'll always use 2 for genotyping) + if ( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + return haplotypes; + } else { + return likelihoodCalculationEngine.selectBestHaplotypesFromPooledLikelihoods(haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation); + } + } + //--------------------------------------------------------------------------------------------------------------- // // reduce diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 51483c53f..5eaaba0dd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -231,7 +231,7 @@ public class LikelihoodCalculationEngine { @Requires({"haplotypes.size() > 0"}) @Ensures({"result.size() <= haplotypes.size()"}) - public List selectBestHaplotypes( final List haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { + public List selectBestHaplotypesFromPooledLikelihoods(final List haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation) { final int numHaplotypes = haplotypes.size(); final Set sampleKeySet = stratifiedReadMap.keySet();