We no longer subset down to the best N haplotypes for the GL calculation.
I explain in comments within the code that this was causing problems with the marginalization over events.
This commit is contained in:
parent
c0e3874db0
commit
c0030f3f2d
|
|
@ -694,11 +694,14 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
//logger.info("Computing read likelihoods with " + assemblyResult.regionForGenotyping.size() + " reads");
|
//logger.info("Computing read likelihoods with " + assemblyResult.regionForGenotyping.size() + " reads");
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( assemblyResult.haplotypes, splitReadsBySample( assemblyResult.regionForGenotyping.getReads() ) );
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( assemblyResult.haplotypes, splitReadsBySample( assemblyResult.regionForGenotyping.getReads() ) );
|
||||||
|
|
||||||
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
|
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
|
||||||
final List<Haplotype> bestHaplotypes = selectBestHaplotypesForGenotyping(assemblyResult.haplotypes, stratifiedReadMap);
|
// was a bad interaction between that selection and the marginalization that happens over each event when computing
|
||||||
|
// GLs. In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the
|
||||||
|
// haplotype containing C as reference (and vice versa). Now this is fine if all possible haplotypes are included
|
||||||
|
// in the genotyping, but we lose information if we select down to a few haplotypes. [EB]
|
||||||
|
|
||||||
final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
||||||
bestHaplotypes,
|
assemblyResult.haplotypes,
|
||||||
stratifiedReadMap,
|
stratifiedReadMap,
|
||||||
perSampleFilteredReadList,
|
perSampleFilteredReadList,
|
||||||
assemblyResult.fullReferenceWithPadding,
|
assemblyResult.fullReferenceWithPadding,
|
||||||
|
|
@ -711,7 +714,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
||||||
if ( bamWriter != null ) {
|
if ( bamWriter != null ) {
|
||||||
haplotypeBAMWriter.writeReadsAlignedToHaplotypes(assemblyResult.haplotypes, assemblyResult.paddedReferenceLoc,
|
haplotypeBAMWriter.writeReadsAlignedToHaplotypes(assemblyResult.haplotypes, assemblyResult.paddedReferenceLoc,
|
||||||
bestHaplotypes,
|
assemblyResult.haplotypes,
|
||||||
calledHaplotypes.getCalledHaplotypes(),
|
calledHaplotypes.getCalledHaplotypes(),
|
||||||
stratifiedReadMap);
|
stratifiedReadMap);
|
||||||
}
|
}
|
||||||
|
|
@ -863,21 +866,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
return new AssemblyResult(trimmedHaplotypes, trimmedActiveRegion, fullReferenceWithPadding, paddedReferenceLoc, true);
|
return new AssemblyResult(trimmedHaplotypes, trimmedActiveRegion, fullReferenceWithPadding, paddedReferenceLoc, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Select the best N haplotypes according to their likelihoods, if appropriate
|
|
||||||
*
|
|
||||||
* @param haplotypes a list of haplotypes to consider
|
|
||||||
* @param stratifiedReadMap a map from samples -> read likelihoods
|
|
||||||
* @return the list of haplotypes to genotype
|
|
||||||
*/
|
|
||||||
protected List<Haplotype> selectBestHaplotypesForGenotyping(final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
|
|
||||||
if ( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
|
||||||
return haplotypes;
|
|
||||||
} else {
|
|
||||||
return likelihoodCalculationEngine.selectBestHaplotypesFromEachSample(haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// reduce
|
// reduce
|
||||||
|
|
|
||||||
|
|
@ -215,7 +215,7 @@ public abstract class LocalAssemblyEngine {
|
||||||
returnHaplotypes.add(h);
|
returnHaplotypes.add(h);
|
||||||
|
|
||||||
if ( debug )
|
if ( debug )
|
||||||
logger.info("Adding haplotype " + h.getCigar() + " from debruijn graph with kmer " + graph.getKmerSize());
|
logger.info("Adding haplotype " + h.getCigar() + " from graph with kmer " + graph.getKmerSize());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue