The GenotypingEngine now uses the samples from the mapping of Samples -> PerReadAllele likelihoods instead of passing around a redundant list of samples
This commit is contained in:
parent
0310499b65
commit
67cd407854
|
|
@ -119,9 +119,10 @@ public class GenotypingEngine {
|
||||||
* Main entry point of class - given a particular set of haplotypes, samples and reference context, compute
|
* Main entry point of class - given a particular set of haplotypes, samples and reference context, compute
|
||||||
* genotype likelihoods and assemble into a list of variant contexts and genomic events ready for calling
|
* genotype likelihoods and assemble into a list of variant contexts and genomic events ready for calling
|
||||||
*
|
*
|
||||||
|
* The list of samples we're working with is obtained from the haplotypeReadMap
|
||||||
|
*
|
||||||
* @param UG_engine UG Engine with basic input parameters
|
* @param UG_engine UG Engine with basic input parameters
|
||||||
* @param haplotypes Haplotypes to assign likelihoods to
|
* @param haplotypes Haplotypes to assign likelihoods to
|
||||||
* @param samples Samples to genotype
|
|
||||||
* @param haplotypeReadMap Map from reads->(haplotypes,likelihoods)
|
* @param haplotypeReadMap Map from reads->(haplotypes,likelihoods)
|
||||||
* @param perSampleFilteredReadList
|
* @param perSampleFilteredReadList
|
||||||
* @param ref Reference bytes at active region
|
* @param ref Reference bytes at active region
|
||||||
|
|
@ -136,7 +137,6 @@ public class GenotypingEngine {
|
||||||
// TODO - can this be refactored? this is hard to follow!
|
// TODO - can this be refactored? this is hard to follow!
|
||||||
public CalledHaplotypes assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine,
|
public CalledHaplotypes assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine,
|
||||||
final List<Haplotype> haplotypes,
|
final List<Haplotype> haplotypes,
|
||||||
final List<String> samples,
|
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||||
final byte[] ref,
|
final byte[] ref,
|
||||||
|
|
@ -147,7 +147,6 @@ public class GenotypingEngine {
|
||||||
// sanity check input arguments
|
// sanity check input arguments
|
||||||
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||||
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
||||||
if (samples == null || samples.isEmpty()) throw new IllegalArgumentException("samples input must be non-empty and non-null, got "+samples);
|
|
||||||
if (haplotypeReadMap == null || haplotypeReadMap.isEmpty()) throw new IllegalArgumentException("haplotypeReadMap input should be non-empty and non-null, got "+haplotypeReadMap);
|
if (haplotypeReadMap == null || haplotypeReadMap.isEmpty()) throw new IllegalArgumentException("haplotypeReadMap input should be non-empty and non-null, got "+haplotypeReadMap);
|
||||||
if (ref == null || ref.length == 0 ) throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got "+ref);
|
if (ref == null || ref.length == 0 ) throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got "+ref);
|
||||||
if (refLoc == null || refLoc.getStop()-refLoc.getStart()+1 != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
|
if (refLoc == null || refLoc.getStop()-refLoc.getStart()+1 != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
|
||||||
|
|
@ -157,7 +156,7 @@ public class GenotypingEngine {
|
||||||
|
|
||||||
// update the haplotypes so we're ready to call, getting the ordered list of positions on the reference
|
// update the haplotypes so we're ready to call, getting the ordered list of positions on the reference
|
||||||
// that carry events among the haplotypes
|
// that carry events among the haplotypes
|
||||||
final TreeSet<Integer> startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, samples, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype);
|
final TreeSet<Integer> startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype);
|
||||||
|
|
||||||
// Walk along each position in the key set and create each event to be outputted
|
// Walk along each position in the key set and create each event to be outputted
|
||||||
final Set<Haplotype> calledHaplotypes = new HashSet<Haplotype>();
|
final Set<Haplotype> calledHaplotypes = new HashSet<Haplotype>();
|
||||||
|
|
@ -195,7 +194,7 @@ public class GenotypingEngine {
|
||||||
|
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
||||||
|
|
||||||
final GenotypesContext genotypes = calculateGLsForThisEvent( samples, alleleReadMap, mergedVC );
|
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
|
||||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||||
if( call != null ) {
|
if( call != null ) {
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
||||||
|
|
@ -224,7 +223,6 @@ public class GenotypingEngine {
|
||||||
* Go through the haplotypes we assembled, and decompose them into their constituent variant contexts
|
* Go through the haplotypes we assembled, and decompose them into their constituent variant contexts
|
||||||
*
|
*
|
||||||
* @param haplotypes the list of haplotypes we're working with
|
* @param haplotypes the list of haplotypes we're working with
|
||||||
* @param samples the samples we're working with
|
|
||||||
* @param haplotypeReadMap map from samples -> the per read allele likelihoods
|
* @param haplotypeReadMap map from samples -> the per read allele likelihoods
|
||||||
* @param ref the reference bases (over the same interval as the haplotypes)
|
* @param ref the reference bases (over the same interval as the haplotypes)
|
||||||
* @param refLoc the span of the reference bases
|
* @param refLoc the span of the reference bases
|
||||||
|
|
@ -232,7 +230,6 @@ public class GenotypingEngine {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
|
private TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Haplotype> haplotypes,
|
||||||
final List<String> samples,
|
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||||
final byte[] ref,
|
final byte[] ref,
|
||||||
final GenomeLoc refLoc,
|
final GenomeLoc refLoc,
|
||||||
|
|
@ -259,9 +256,9 @@ public class GenotypingEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanUpSymbolicUnassembledEvents( haplotypes );
|
cleanUpSymbolicUnassembledEvents( haplotypes );
|
||||||
if ( !in_GGA_mode && samples.size() >= 10 ) {
|
if ( !in_GGA_mode && haplotypeReadMap.size() >= 10 ) {
|
||||||
// if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
|
// if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
|
||||||
mergeConsecutiveEventsBasedOnLD( haplotypes, samples, haplotypeReadMap, startPosKeySet, ref, refLoc );
|
mergeConsecutiveEventsBasedOnLD( haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc );
|
||||||
cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events
|
cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -282,7 +279,6 @@ public class GenotypingEngine {
|
||||||
private List<String> makePriorityList(final List<VariantContext> vcs) {
|
private List<String> makePriorityList(final List<VariantContext> vcs) {
|
||||||
final List<String> priorityList = new LinkedList<String>();
|
final List<String> priorityList = new LinkedList<String>();
|
||||||
for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource());
|
for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource());
|
||||||
|
|
||||||
return priorityList;
|
return priorityList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -333,17 +329,16 @@ public class GenotypingEngine {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele
|
* For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele
|
||||||
* @param samples List of samples to genotype
|
|
||||||
* @param alleleReadMap Allele map describing mapping from reads to alleles and corresponding likelihoods
|
* @param alleleReadMap Allele map describing mapping from reads to alleles and corresponding likelihoods
|
||||||
* @param mergedVC Input VC with event to genotype
|
* @param mergedVC Input VC with event to genotype
|
||||||
* @return GenotypesContext object wrapping genotype objects with PLs
|
* @return GenotypesContext object wrapping genotype objects with PLs
|
||||||
*/
|
*/
|
||||||
@Requires({"samples != null","alleleReadMap!= null", "mergedVC != null"})
|
@Requires({"alleleReadMap!= null", "mergedVC != null"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
private GenotypesContext calculateGLsForThisEvent( final List<String> samples, final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap, final VariantContext mergedVC ) {
|
private GenotypesContext calculateGLsForThisEvent( final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap, final VariantContext mergedVC ) {
|
||||||
final GenotypesContext genotypes = GenotypesContext.create(samples.size());
|
final GenotypesContext genotypes = GenotypesContext.create(alleleReadMap.size());
|
||||||
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
|
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
|
||||||
for( final String sample : samples ) {
|
for( final String sample : alleleReadMap.keySet() ) {
|
||||||
final int numHaplotypes = mergedVC.getAlleles().size();
|
final int numHaplotypes = mergedVC.getAlleles().size();
|
||||||
final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
|
final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
|
||||||
final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleReadMap, mergedVC.getAlleles());
|
final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleReadMap, mergedVC.getAlleles());
|
||||||
|
|
@ -448,14 +443,12 @@ public class GenotypingEngine {
|
||||||
/**
|
/**
|
||||||
* TODO - comment me, clean me, refactor me!
|
* TODO - comment me, clean me, refactor me!
|
||||||
* @param haplotypes
|
* @param haplotypes
|
||||||
* @param samples
|
|
||||||
* @param haplotypeReadMap
|
* @param haplotypeReadMap
|
||||||
* @param startPosKeySet
|
* @param startPosKeySet
|
||||||
* @param ref
|
* @param ref
|
||||||
* @param refLoc
|
* @param refLoc
|
||||||
*/
|
*/
|
||||||
protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes,
|
protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes,
|
||||||
final List<String> samples,
|
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||||
final TreeSet<Integer> startPosKeySet,
|
final TreeSet<Integer> startPosKeySet,
|
||||||
final byte[] ref,
|
final byte[] ref,
|
||||||
|
|
@ -465,6 +458,7 @@ public class GenotypingEngine {
|
||||||
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
|
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
|
||||||
if( startPosKeySet.size() <= 1 ) { return; }
|
if( startPosKeySet.size() <= 1 ) { return; }
|
||||||
|
|
||||||
|
final Set<String> samples = haplotypeReadMap.keySet();
|
||||||
boolean mapWasUpdated = true;
|
boolean mapWasUpdated = true;
|
||||||
while( mapWasUpdated ) {
|
while( mapWasUpdated ) {
|
||||||
mapWasUpdated = false;
|
mapWasUpdated = false;
|
||||||
|
|
|
||||||
|
|
@ -559,7 +559,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
|
|
||||||
final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
||||||
bestHaplotypes,
|
bestHaplotypes,
|
||||||
samplesList,
|
|
||||||
stratifiedReadMap,
|
stratifiedReadMap,
|
||||||
perSampleFilteredReadList,
|
perSampleFilteredReadList,
|
||||||
fullReferenceWithPadding,
|
fullReferenceWithPadding,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue