diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 8e76b6ea6..59cadbdf9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -119,9 +119,10 @@ public class GenotypingEngine { * Main entry point of class - given a particular set of haplotypes, samples and reference context, compute * genotype likelihoods and assemble into a list of variant contexts and genomic events ready for calling * + * The list of samples we're working with is obtained from the haplotypeReadMap + * * @param UG_engine UG Engine with basic input parameters * @param haplotypes Haplotypes to assign likelihoods to - * @param samples Samples to genotype * @param haplotypeReadMap Map from reads->(haplotypes,likelihoods) * @param perSampleFilteredReadList * @param ref Reference bytes at active region @@ -136,7 +137,6 @@ public class GenotypingEngine { // TODO - can this be refactored? this is hard to follow! public CalledHaplotypes assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine, final List haplotypes, - final List samples, final Map haplotypeReadMap, final Map> perSampleFilteredReadList, final byte[] ref, @@ -147,7 +147,6 @@ public class GenotypingEngine { // sanity check input arguments if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine); if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes); - if (samples == null || samples.isEmpty()) throw new IllegalArgumentException("samples input must be non-empty and non-null, got "+samples); if (haplotypeReadMap == null || haplotypeReadMap.isEmpty()) throw new IllegalArgumentException("haplotypeReadMap input should be non-empty and non-null, got "+haplotypeReadMap); if (ref == null || ref.length == 0 ) throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got "+ref); if (refLoc == null || refLoc.getStop()-refLoc.getStart()+1 != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc); @@ -157,7 +156,7 @@ public class GenotypingEngine { // update the haplotypes so we're ready to call, getting the ordered list of positions on the reference // that carry events among the haplotypes - final TreeSet startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, samples, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype); + final TreeSet startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, haplotypeReadMap, ref, refLoc, activeAllelesToGenotype); // Walk along each position in the key set and create each event to be outputted final Set calledHaplotypes = new HashSet(); @@ -195,7 +194,7 @@ public class GenotypingEngine { final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog ); - final GenotypesContext genotypes = calculateGLsForThisEvent( samples, alleleReadMap, mergedVC ); + final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL); if( call != null ) { final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : @@ -224,7 +223,6 @@ public class GenotypingEngine { * Go through the haplotypes we assembled, and decompose them into their constituent variant contexts * * @param haplotypes the list of haplotypes we're working with - * @param samples the samples we're working with * @param haplotypeReadMap map from samples -> the per read allele likelihoods * @param ref the reference bases (over the same interval as the haplotypes) * @param refLoc the span of the reference bases @@ -232,7 +230,6 @@ public class GenotypingEngine { * @return */ private TreeSet decomposeHaplotypesIntoVariantContexts(final List haplotypes, - final List samples, final Map haplotypeReadMap, final byte[] ref, final GenomeLoc refLoc, @@ -259,9 +256,9 @@ public class GenotypingEngine { } cleanUpSymbolicUnassembledEvents( haplotypes ); - if ( !in_GGA_mode && samples.size() >= 10 ) { + if ( !in_GGA_mode && haplotypeReadMap.size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure - mergeConsecutiveEventsBasedOnLD( haplotypes, samples, haplotypeReadMap, startPosKeySet, ref, refLoc ); + mergeConsecutiveEventsBasedOnLD( haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc ); cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events } @@ -282,7 +279,6 @@ public class GenotypingEngine { private List makePriorityList(final List vcs) { final List priorityList = new LinkedList(); for ( final VariantContext vc : vcs ) priorityList.add(vc.getSource()); - return priorityList; } @@ -333,17 +329,16 @@ public class GenotypingEngine { /** * For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele - * @param samples List of samples to genotype * @param alleleReadMap Allele map describing mapping from reads to alleles and corresponding likelihoods * @param mergedVC Input VC with event to genotype * @return GenotypesContext object wrapping genotype objects with PLs */ - @Requires({"samples != null","alleleReadMap!= null", "mergedVC != null"}) + @Requires({"alleleReadMap!= null", "mergedVC != null"}) @Ensures("result != null") - private GenotypesContext calculateGLsForThisEvent( final List samples, final Map alleleReadMap, final VariantContext mergedVC ) { - final GenotypesContext genotypes = GenotypesContext.create(samples.size()); + private GenotypesContext calculateGLsForThisEvent( final Map alleleReadMap, final VariantContext mergedVC ) { + final GenotypesContext genotypes = GenotypesContext.create(alleleReadMap.size()); // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample - for( final String sample : samples ) { + for( final String sample : alleleReadMap.keySet() ) { final int numHaplotypes = mergedVC.getAlleles().size(); final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2]; final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleReadMap, mergedVC.getAlleles()); @@ -448,14 +443,12 @@ public class GenotypingEngine { /** * TODO - comment me, clean me, refactor me! * @param haplotypes - * @param samples * @param haplotypeReadMap * @param startPosKeySet * @param ref * @param refLoc */ protected void mergeConsecutiveEventsBasedOnLD( final List haplotypes, - final List samples, final Map haplotypeReadMap, final TreeSet startPosKeySet, final byte[] ref, @@ -465,6 +458,7 @@ public class GenotypingEngine { final double MERGE_EVENTS_R2_THRESHOLD = 0.95; if( startPosKeySet.size() <= 1 ) { return; } + final Set samples = haplotypeReadMap.keySet(); boolean mapWasUpdated = true; while( mapWasUpdated ) { mapWasUpdated = false; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index d77caa2a2..a6b19826b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -559,7 +559,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final GenotypingEngine.CalledHaplotypes calledHaplotypes = genotypingEngine.assignGenotypeLikelihoods( UG_engine, bestHaplotypes, - samplesList, stratifiedReadMap, perSampleFilteredReadList, fullReferenceWithPadding,