diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 8b789791d..a2920a432 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -79,7 +79,25 @@ public class GenotypingEngine { noCall.add(Allele.NO_CALL); } + /** + * Main entry point of class - given a particular set of haplotypes, samples and reference context, compute + * genotype likelihoods and assemble into a list of variant contexts and genomic events ready for calling + * + * @param UG_engine UG Engine with basic input parameters + * @param haplotypes Haplotypes to assign likelihoods to + * @param samples Samples to genotype + * @param haplotypeReadMap Map from reads->(haplotypes,likelihoods) + * @param perSampleFilteredReadList + * @param ref Reference bytes at active region + * @param refLoc Corresponding active region genome location + * @param activeRegionWindow Active window + * @param genomeLocParser GenomeLocParser + * @param activeAllelesToGenotype Alleles to genotype + * @return List of VC's with genotyped events + */ @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) + @Ensures("result != null") + // TODO - can this be refactored? this is hard to follow! public List assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine, final List haplotypes, final List samples, @@ -90,6 +108,25 @@ public class GenotypingEngine { final GenomeLoc activeRegionWindow, final GenomeLocParser genomeLocParser, final List activeAllelesToGenotype ) { + // sanity check input arguments + if (UG_engine == null) + throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine); + if (haplotypes == null || haplotypes.isEmpty()) + throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes); + if (samples == null || samples.isEmpty()) + throw new IllegalArgumentException("samples input must be non-empty and non-null, got "+samples); + if (haplotypeReadMap == null || haplotypeReadMap.isEmpty()) + throw new IllegalArgumentException("haplotypeReadMap input should be non-empty and non-null, got "+haplotypeReadMap); + if (ref == null || ref.length == 0 ) + throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got "+ref); + if (refLoc == null || refLoc.getStop()-refLoc.getStart()+1 != ref.length) + throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc); + if (activeRegionWindow == null ) + throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow); + if (activeAllelesToGenotype == null ) + throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype); + if (genomeLocParser == null ) + throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser); final List returnCalls = new ArrayList(); final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty(); @@ -180,7 +217,7 @@ public class GenotypingEngine { if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } - final Map mergeMap = new HashMap(); + final Map mergeMap = new LinkedHashMap(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) { mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function @@ -214,6 +251,15 @@ public class GenotypingEngine { return returnCalls; } + /** + * For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele + * @param samples List of samples to genotype + * @param alleleReadMap Allele map describing mapping from reads to alleles and corresponding likelihoods + * @param mergedVC Input VC with event to genotype + * @return GenotypesContext object wrapping genotype objects with PLs + */ + @Requires({"samples != null","alleleReadMap!= null", "mergedVC != null"}) + @Ensures("result != null") private GenotypesContext calculateGLsForThisEvent( final List samples, final Map alleleReadMap, final VariantContext mergedVC ) { final GenotypesContext genotypes = GenotypesContext.create(samples.size()); // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample @@ -254,7 +300,7 @@ public class GenotypingEngine { final Map> perSampleFilteredReadList, final VariantContext call ) { - final Map returnMap = new HashMap(); + final Map returnMap = new LinkedHashMap(); final GenomeLoc callLoc = parser.createGenomeLoc(call); for( final Map.Entry sample : perSampleReadMap.entrySet() ) { final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); @@ -283,6 +329,12 @@ public class GenotypingEngine { return returnMap; } + /** + * Removes symbolic events from list of haplotypes + * @param haplotypes Input/output list of haplotypes, before/after removal + */ + // TODO - split into input haplotypes and output haplotypes as not to share I/O arguments + @Requires("haplotypes != null") protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { final List haplotypesToRemove = new ArrayList(); for( final Haplotype h : haplotypes ) { @@ -308,7 +360,7 @@ public class GenotypingEngine { final double downsamplingFraction, final PrintStream downsamplingLog ) { - final Map alleleReadMap = new HashMap(); + final Map alleleReadMap = new LinkedHashMap(); for( final Map.Entry haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap(); for( final Map.Entry> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele @@ -330,6 +382,15 @@ public class GenotypingEngine { return alleleReadMap; } + /** + * TODO - comment me, clean me, refactor me! + * @param haplotypes + * @param samples + * @param haplotypeReadMap + * @param startPosKeySet + * @param ref + * @param refLoc + */ protected void mergeConsecutiveEventsBasedOnLD( final List haplotypes, final List samples, final Map haplotypeReadMap, @@ -474,7 +535,7 @@ public class GenotypingEngine { } protected static Map> createAlleleMapper( final Map mergeMap, final Map> eventMap ) { - final Map> alleleMapper = new HashMap>(); + final Map> alleleMapper = new LinkedHashMap>(); for( final Map.Entry entry : mergeMap.entrySet() ) { alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey()))); } @@ -485,7 +546,7 @@ public class GenotypingEngine { @Ensures({"result.size() == eventsAtThisLoc.size() + 1"}) protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes ) { - final Map> eventMapper = new HashMap>(eventsAtThisLoc.size()+1); + final Map> eventMapper = new LinkedHashMap>(eventsAtThisLoc.size()+1); VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis eventMapper.put(new Event(null), new ArrayList()); for( final VariantContext vc : eventsAtThisLoc ) { @@ -598,7 +659,7 @@ public class GenotypingEngine { } protected static Map generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { - final Map vcs = new HashMap(); + final Map vcs = new LinkedHashMap(); int refPos = alignmentStartHapwrtRef; if( refPos < 0 ) { return null; } // Protection against SW failures diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 99af48111..74e28db63 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -68,7 +68,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "042b76d4ba0c8f76e2e9cadd1c20d90d"); + HCTest(CEUTRIO_BAM, "", "1e49fd927d79594a993ea6c4a1d10004"); } @Test @@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { "76d4c4a112cf60080adf74c3e116d1fb"); } - @Test(enabled = false) // TODO -- https://jira.broadinstitute.org/browse/GSA-722 + @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", "23a4bfa0300683d8cf2ec16ce96e89ad"); @@ -146,7 +146,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("866406b43d22a262b2d852e7252eb430")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("598d245498c0d0b55e263f0a061a77e3")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 728a13aa8..cc4fc6129 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -64,15 +64,13 @@ public class PerReadAlleleLikelihoodMap { if ( a == null ) throw new IllegalArgumentException("Cannot add a null allele to the allele likelihood map"); if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null"); if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))"); - Map likelihoodMap; - if (likelihoodReadMap.containsKey(read)){ - // seen pileup element before - likelihoodMap = likelihoodReadMap.get(read); - } - else { - likelihoodMap = new HashMap(); - likelihoodReadMap.put(read,likelihoodMap); + Map likelihoodMap = likelihoodReadMap.get(read); + if (likelihoodMap == null){ + // LinkedHashMap will ensure iterating through alleles will be in consistent order + likelihoodMap = new LinkedHashMap(); } + likelihoodReadMap.put(read,likelihoodMap); + likelihoodMap.put(a,likelihood); if (!alleles.contains(a)) @@ -221,4 +219,27 @@ public class PerReadAlleleLikelihoodMap { } return (maxLike - prevMaxLike > INFORMATIVE_LIKELIHOOD_THRESHOLD ? mostLikelyAllele : Allele.NO_CALL ); } + + + /** + * Debug method to dump contents of object into string for display + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append("Alelles in map:"); + for (Allele a:alleles) { + sb.append(a.getDisplayString()+","); + + } + sb.append("\n"); + for (Map.Entry > el : getLikelihoodReadMap().entrySet() ) { + for (Map.Entry eli : el.getValue().entrySet()) { + sb.append("Read "+el.getKey().getReadName()+". Allele:"+eli.getKey().getDisplayString()+" has likelihood="+Double.toString(eli.getValue())+"\n"); + } + + } + return sb.toString(); + } }