Increase the maxNumHaplotypesInPopulation to 25

-- A somewhat arbitrary increase, and will need some evaluation but necessary to get good results on the AFR integrationtest.
This commit is contained in:
Mark DePristo 2013-03-26 21:00:04 -04:00
parent 66910b036c
commit 197d149495
2 changed files with 19 additions and 4 deletions

View File

@ -204,7 +204,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Advanced
@Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false)
protected int maxNumHaplotypesInPopulation = 13;
protected int maxNumHaplotypesInPopulation = 25;
@Advanced
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
@ -557,8 +557,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
final List<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes );
final List<Haplotype> bestHaplotypes = selectBestHaplotypesForGenotyping(haplotypes, stratifiedReadMap);
final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
bestHaplotypes,
@ -586,6 +585,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
return 1; // One active region was processed during this map call
}
/**
* Select the best N haplotypes according to their likelihoods, if appropriate
*
* @param haplotypes a list of haplotypes to consider
* @param stratifiedReadMap a map from samples -> read likelihoods
* @return the list of haplotypes to genotype
*/
protected List<Haplotype> selectBestHaplotypesForGenotyping(final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
// TODO -- skip this calculation if the list of haplotypes is of size 2 (as we'll always use 2 for genotyping)
if ( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
return haplotypes;
} else {
return likelihoodCalculationEngine.selectBestHaplotypesFromPooledLikelihoods(haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation);
}
}
//---------------------------------------------------------------------------------------------------------------
//
// reduce

View File

@ -231,7 +231,7 @@ public class LikelihoodCalculationEngine {
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.size() <= haplotypes.size()"})
public List<Haplotype> selectBestHaplotypes( final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation ) {
public List<Haplotype> selectBestHaplotypesFromPooledLikelihoods(final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation) {
final int numHaplotypes = haplotypes.size();
final Set<String> sampleKeySet = stratifiedReadMap.keySet();