Using PathComparatorTotalScore in the assembly graph traversal does a better job of capturing low frequency branches that are inside high frequnecy haplotypes.

This commit is contained in:
Ryan Poplin 2012-08-03 13:14:37 -04:00
parent 3dabb90eb0
commit ff80f17721
3 changed files with 10 additions and 10 deletions

View File

@ -415,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
final ArrayList<Haplotype> bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES

View File

@ -82,11 +82,11 @@ public class KBestPaths {
}
}
protected static class PathComparatorLowestEdge implements Comparator<Path> {
public int compare(final Path path1, final Path path2) {
return path2.lowestEdge - path1.lowestEdge;
}
}
//protected static class PathComparatorLowestEdge implements Comparator<Path> {
// public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge;
// }
//}
public static List<Path> getKBestPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int k ) {
if( k > MAX_PATHS_TO_HOLD/2 ) { throw new ReviewedStingException("Asked for more paths than MAX_PATHS_TO_HOLD!"); }
@ -99,7 +99,7 @@ public class KBestPaths {
}
}
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
Collections.sort(bestPaths, new PathComparatorTotalScore() );
Collections.reverse(bestPaths);
return bestPaths.subList(0, Math.min(k, bestPaths.size()));
}
@ -114,8 +114,8 @@ public class KBestPaths {
if ( allOutgoingEdgesHaveBeenVisited(graph, path) ) {
if ( bestPaths.size() >= MAX_PATHS_TO_HOLD ) {
// clean out some low scoring paths
Collections.sort(bestPaths, new PathComparatorLowestEdge() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); }
Collections.sort(bestPaths, new PathComparatorTotalScore() );
for(int iii = 0; iii < 20; iii++) { bestPaths.remove(0); } // BUGBUG: assumes MAX_PATHS_TO_HOLD >> 20
}
bestPaths.add(path);
} else if( n.val > 10000) {

View File

@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine {
int hap1 = 0;
int hap2 = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 );
final int maxChosenHaplotypes = Math.min( 13, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int iii = 0; iii < numHaplotypes; iii++ ) {