Increase the maxNumHaplotypesInPopulation to 25

-- A somewhat arbitrary increase, and will need some evaluation but necessary to get good results on the AFR integrationtest.
2013-03-26 21:00:04 -04:00 · 2013-03-26 21:00:04 -04:00 · 197d149495
parent 66910b036c
commit 197d149495
2 changed files with 19 additions and 4 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@ -204,7 +204,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem

    @Advanced
    @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false)
-    protected int maxNumHaplotypesInPopulation = 13;
+    protected int maxNumHaplotypesInPopulation = 25;

    @Advanced
    @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
@ -557,8 +557,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );

        // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
-        final List<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
-                                                      likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes );
+        final List<Haplotype> bestHaplotypes = selectBestHaplotypesForGenotyping(haplotypes, stratifiedReadMap);

        final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
                bestHaplotypes,
@ -586,6 +585,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
        return 1; // One active region was processed during this map call
    }

+    /**
+     * Select the best N haplotypes according to their likelihoods, if appropriate
+     *
+     * @param haplotypes a list of haplotypes to consider
+     * @param stratifiedReadMap a map from samples -> read likelihoods
+     * @return the list of haplotypes to genotype
+     */
+    protected List<Haplotype> selectBestHaplotypesForGenotyping(final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
+        // TODO -- skip this calculation if the list of haplotypes is of size 2 (as we'll always use 2 for genotyping)
+        if ( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
+            return haplotypes;
+        } else {
+            return likelihoodCalculationEngine.selectBestHaplotypesFromPooledLikelihoods(haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation);
+        }
+    }
+
    //---------------------------------------------------------------------------------------------------------------
    //
    // reduce
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@ -231,7 +231,7 @@ public class LikelihoodCalculationEngine {

    @Requires({"haplotypes.size() > 0"})
    @Ensures({"result.size() <= haplotypes.size()"})
-    public List<Haplotype> selectBestHaplotypes( final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation ) {
+    public List<Haplotype> selectBestHaplotypesFromPooledLikelihoods(final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation) {

        final int numHaplotypes = haplotypes.size();
        final Set<String> sampleKeySet = stratifiedReadMap.keySet();