diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 0d6e29fe9..ee9993b4f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -794,7 +794,7 @@ public class GenotypingEngine { return vcs; } - private static class Event { + protected static class Event { public VariantContext vc; public Event( final VariantContext vc ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index c416938cd..292bea50d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -46,11 +46,22 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broad.tribble.TribbleIndexedFeatureReader; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFCodec; import org.testng.annotations.Test; -import java.util.Arrays; -import java.util.Collections; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.*; public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String REF = b37KGReference; @@ -88,6 +99,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { "70bd5d0805bf6f51e5f61b377526c979"); } + @Test + public void testHaplotypeCallerInsertionOnEdgeOfContig() { + HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); + } + private void HCTestIndelQualityScores(String bam, String args, String md5) { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); @@ -99,9 +115,41 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "4141b4c24a136a3fe4c0b0a4c231cdfa"); } + private void HCTestNearbySmallIntervals(String bam, String args, String md5) { + try { + final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference)); + final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary()); + + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + for( final File vcf : executeTest("testHaplotypeCallerNearbySmallIntervals: args=" + args, spec).getFirst() ) { + if( containsDuplicateRecord(vcf, parser) ) { + throw new IllegalStateException("Duplicate records detected but there should be none."); + } + } + } catch( FileNotFoundException e ) { + throw new IllegalStateException("Could not find the b37 reference file."); + } + } + + private boolean containsDuplicateRecord( final File vcf, final GenomeLocParser parser ) { + final List> VCs = new ArrayList>(); + try { + for( final VariantContext vc : GATKVCFUtils.readVCF(vcf).getSecond() ) { + VCs.add(new Pair(parser.createGenomeLoc(vc), new GenotypingEngine.Event(vc))); + } + } catch( IOException e ) { + throw new IllegalStateException("Somehow the temporary VCF from the integration test could not be read."); + } + + final Set> VCsAsSet = new HashSet>(VCs); + return VCsAsSet.size() != VCs.size(); // The set will remove duplicate Events. + } + + @Test - public void testHaplotypeCallerInsertionOnEdgeOfContig() { - HCTest(CEUTRIO_MT_TEST_BAM, "-dcov 90 -L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); + public void testHaplotypeCallerNearbySmallIntervals() { + HCTestNearbySmallIntervals(NA12878_BAM, "", "b9d614efdaf38b87b459df421aab93a7"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 64c6d5094..7b831db32 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -70,7 +70,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine extends TraversalEngine stateList; @@ -98,7 +98,7 @@ public class ActivityProfile { */ @Ensures("result >= 0") public int getMaxProbPropagationDistance() { - return MAX_PROB_PROPOGATION_DISTANCE; + return MAX_PROB_PROPAGATION_DISTANCE; } /** @@ -210,7 +210,7 @@ public class ActivityProfile { contigLength = parser.getContigInfo(regionStartLoc.getContig()).getSequenceLength(); } else { if ( regionStopLoc.getStart() != loc.getStart() - 1 ) - throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); + throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediately after last loc " + regionStopLoc ); regionStopLoc = loc; } @@ -239,7 +239,7 @@ public class ActivityProfile { throw new IllegalArgumentException("Must add state contiguous to existing states: adding " + stateToAdd); if ( position >= 0 ) { - // ignore states starting before this regions start + // ignore states starting before this region's start if ( position < size() ) { stateList.get(position).isActiveProb += stateToAdd.isActiveProb; } else { @@ -352,6 +352,12 @@ public class ActivityProfile { if ( stateList.isEmpty() ) return null; + // If we are flushing the activity profile we need to trim off the excess states so that we don't create regions outside of our current processing interval + if( forceConversion ) { + final List statesToTrimAway = new ArrayList(stateList.subList(getSpan().size(), stateList.size())); + stateList.removeAll(statesToTrimAway); + } + final ActivityProfileState first = stateList.get(0); final boolean isActiveRegion = first.isActiveProb > ACTIVE_PROB_THRESHOLD; final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion);