diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 9de9b3292..192befe67 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -52,7 +52,11 @@ public class GenotypingEngine { noCall.add(Allele.NO_CALL); } - // This function is the streamlined approach, currently not being used + // WARN + // This function is the streamlined approach, currently not being used by default + // WARN + // WARN: This function is currently only being used by Menachem. Slated for removal/merging with the rest of the code. + // WARN @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) public List>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, @@ -184,6 +188,7 @@ public class GenotypingEngine { return returnCalls; } + // BUGBUG: Create a class to hold this complicated return type @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) public List>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine, final ArrayList haplotypes, @@ -210,13 +215,8 @@ public class GenotypingEngine { System.out.println( ">> Events = " + h.getEventMap()); } } - // Create the VC merge priority list - final ArrayList priorityList = new ArrayList(); - for( int iii = 0; iii < haplotypes.size(); iii++ ) { - priorityList.add("HC" + iii); - } - cleanUpSymbolicUnassembledEvents( haplotypes, priorityList ); + cleanUpSymbolicUnassembledEvents( haplotypes ); if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc ); } @@ -229,13 +229,16 @@ public class GenotypingEngine { // Walk along each position in the key set and create each event to be outputted for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { - final ArrayList eventsAtThisLoc = new ArrayList(); + final ArrayList eventsAtThisLoc = new ArrayList(); // the overlapping events to merge into a common reference view + final ArrayList priorityList = new ArrayList(); // used to merge overlapping events into common reference view + if( activeAllelesToGenotype.isEmpty() ) { for( final Haplotype h : haplotypes ) { final HashMap eventMap = h.getEventMap(); final VariantContext vc = eventMap.get(loc); if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) { eventsAtThisLoc.add(vc); + priorityList.add(vc.getSource()); } } } else { // we are in GGA mode! @@ -260,6 +263,22 @@ public class GenotypingEngine { // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event final ArrayList> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes ); + // Sanity check the priority list + for( final VariantContext vc : eventsAtThisLoc ) { + if( !priorityList.contains(vc.getSource()) ) { + throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles."); + } + } + for( final String name : priorityList ) { + boolean found = false; + for( final VariantContext vc : eventsAtThisLoc ) { + if(vc.getSource().equals(name)) { found = true; break; } + } + if( !found ) { + throw new ReviewedStingException("Event added to priority list but wasn't found on any haplotype. Something went wrong in the merging of alleles."); + } + } + // Merge the event to find a common reference representation final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); if( mergedVC == null ) { continue; } @@ -299,9 +318,8 @@ public class GenotypingEngine { return returnCalls; } - protected static void cleanUpSymbolicUnassembledEvents( final ArrayList haplotypes, final ArrayList priorityList ) { + protected static void cleanUpSymbolicUnassembledEvents( final ArrayList haplotypes ) { final ArrayList haplotypesToRemove = new ArrayList(); - final ArrayList stringsToRemove = new ArrayList(); for( final Haplotype h : haplotypes ) { for( final VariantContext vc : h.getEventMap().values() ) { if( vc.isSymbolic() ) { @@ -309,7 +327,6 @@ public class GenotypingEngine { for( final VariantContext vc2 : h2.getEventMap().values() ) { if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) { haplotypesToRemove.add(h); - stringsToRemove.add(vc.getSource()); break; } } @@ -318,7 +335,6 @@ public class GenotypingEngine { } } haplotypes.removeAll(haplotypesToRemove); - priorityList.removeAll(stringsToRemove); } protected void mergeConsecutiveEventsBasedOnLD( final ArrayList haplotypes, final TreeSet startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index dc727fa48..577b1cfdc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -16,7 +16,7 @@ import java.util.*; /** * The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele). - * Note that the base quality rank sum test can not be calculated for homozygous sites. + * Note that the base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles. */ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation { public List getKeyNames() { return Arrays.asList("BaseQRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index 1fd220f2f..c74f98ca3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -16,6 +16,10 @@ import java.util.*; * Date: 6/28/12 */ +/** + * The u-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases (reads with ref bases vs. those with the alternate allele) + * Note that the clipping rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles. + */ public class ClippingRankSumTest extends RankSumTest { public List getKeyNames() { return Arrays.asList("ClippingRankSum"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 6557f3e47..787c9b29b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -17,7 +17,7 @@ import java.util.*; /** * The u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele) - * Note that the mapping quality rank sum test can not be calculated for homozygous sites. + * Note that the mapping quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles. */ public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 1ac8ee113..de0ce2ce2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -20,7 +20,7 @@ import java.util.*; /** * The u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error). - * Note that the read position rank sum test can not be calculated for homozygous sites. + * Note that the read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles. */ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation {