diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 9eee9b7e4..3078122fb 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -888,7 +888,8 @@ public class GenomeAnalysisEngine { argCollection.defaultBaseQualities, removeProgramRecords, keepReadsInLIBS, - sampleRenameMap); + sampleRenameMap, + argCollection.intervalArguments.intervalMerging); } /** diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java index 2f03edb68..d925e0d0d 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; @@ -51,6 +52,7 @@ public class BAMScheduler implements Iterator { private GenomeLocSortedSet loci; private PeekableIterator locusIterator; private GenomeLoc currentLocus; + private IntervalMergingRule intervalMergingRule; /* * Creates BAMScheduler using contigs from the given BAM data source. @@ -59,27 +61,28 @@ public class BAMScheduler implements Iterator { * @return non-null BAM scheduler */ public static BAMScheduler createOverMappedReads(final SAMDataSource dataSource) { - final BAMScheduler scheduler = new BAMScheduler(dataSource); + final BAMScheduler scheduler = new BAMScheduler(dataSource, IntervalMergingRule.ALL); final GenomeLocSortedSet intervals = GenomeLocSortedSet.createSetFromSequenceDictionary(dataSource.getHeader().getSequenceDictionary()); scheduler.populateFilteredIntervalList(intervals); return scheduler; } public static BAMScheduler createOverAllReads(final SAMDataSource dataSource, final GenomeLocParser parser) { - BAMScheduler scheduler = new BAMScheduler(dataSource); + BAMScheduler scheduler = new BAMScheduler(dataSource, IntervalMergingRule.ALL); scheduler.populateUnfilteredIntervalList(parser); return scheduler; } - public static BAMScheduler createOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { - BAMScheduler scheduler = new BAMScheduler(dataSource); + public static BAMScheduler createOverIntervals(final SAMDataSource dataSource, final IntervalMergingRule mergeRule, final GenomeLocSortedSet loci) { + BAMScheduler scheduler = new BAMScheduler(dataSource, mergeRule); scheduler.populateFilteredIntervalList(loci); return scheduler; } - private BAMScheduler(final SAMDataSource dataSource) { + private BAMScheduler(final SAMDataSource dataSource, final IntervalMergingRule mergeRule) { this.dataSource = dataSource; + this.intervalMergingRule = mergeRule; for(SAMReaderID reader: dataSource.getReaderIDs()) { GATKBAMIndex index = dataSource.getIndex(reader); if(index != null) @@ -124,7 +127,7 @@ public class BAMScheduler implements Iterator { * @return A file pointer over the specified region. */ private FilePointer generatePointerOverEntireFileset() { - FilePointer filePointer = new FilePointer(); + FilePointer filePointer = new FilePointer(intervalMergingRule); // This is a "monolithic" FilePointer representing all regions in all files we will ever visit, and is // the only FilePointer we will create. This allows us to have this FilePointer represent regions from @@ -165,14 +168,14 @@ public class BAMScheduler implements Iterator { while(nextFilePointer == null && currentLocus != null) { // special case handling of the unmapped shard. if(currentLocus == GenomeLoc.UNMAPPED) { - nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED); + nextFilePointer = new FilePointer(intervalMergingRule, GenomeLoc.UNMAPPED); for(SAMReaderID id: dataSource.getReaderIDs()) nextFilePointer.addFileSpans(id,createSpanToEndOfFile(indexFiles.get(id).getStartOfLastLinearBin())); currentLocus = null; continue; } - nextFilePointer = new FilePointer(); + nextFilePointer = new FilePointer(intervalMergingRule); int coveredRegionStart = 1; int coveredRegionStop = Integer.MAX_VALUE; diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java index 517903da3..a66ab7130 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -45,6 +45,7 @@ import java.util.*; public class FilePointer { protected final SortedMap fileSpans = new TreeMap(); protected final List locations = new ArrayList(); + protected final IntervalMergingRule intervalMergingRule; /** * Does this file pointer point into an unmapped region? @@ -65,7 +66,8 @@ public class FilePointer { private Integer contigIndex = null; - public FilePointer( List locations ) { + public FilePointer( final IntervalMergingRule mergeRule, final List locations ) { + this.intervalMergingRule = mergeRule; this.locations.addAll(locations); this.isRegionUnmapped = checkUnmappedStatus(); @@ -75,12 +77,12 @@ public class FilePointer { } } - public FilePointer( final GenomeLoc... locations ) { - this(Arrays.asList(locations)); + public FilePointer( final IntervalMergingRule mergeRule, final GenomeLoc... locations ) { + this(mergeRule, Arrays.asList(locations)); } - public FilePointer( Map fileSpans, List locations ) { - this(locations); + public FilePointer( final Map fileSpans, final IntervalMergingRule mergeRule, final List locations ) { + this(mergeRule, locations); this.fileSpans.putAll(fileSpans); } @@ -152,6 +154,15 @@ public class FilePointer { return locations.size() > 0 ? locations.get(0).getContigIndex() : SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; } + /** + * Returns the IntervalMergingRule used by this FilePointer to merge adjacent locations + * + * @return the IntervalMergingRule used by this FilePointer (never null) + */ + public IntervalMergingRule getIntervalMergingRule() { + return intervalMergingRule; + } + /** * Is this FilePointer "monolithic"? That is, does it represent all regions in all files that we will * ever visit during this GATK run? If this is set to true, the engine will expect to see only this @@ -277,12 +288,12 @@ public class FilePointer { * @return A completely new file pointer that is the combination of the two. */ public FilePointer combine(final GenomeLocParser parser, final FilePointer other) { - FilePointer combined = new FilePointer(); + FilePointer combined = new FilePointer(intervalMergingRule); List intervals = new ArrayList(); intervals.addAll(locations); intervals.addAll(other.locations); - for(GenomeLoc interval: IntervalUtils.sortAndMergeIntervals(parser,intervals,IntervalMergingRule.ALL)) + for(GenomeLoc interval: IntervalUtils.sortAndMergeIntervals(parser,intervals,intervalMergingRule)) combined.addLocation(interval); PeekableIterator> thisIterator = new PeekableIterator>(this.fileSpans.entrySet().iterator()); @@ -340,15 +351,18 @@ public class FilePointer { */ public static FilePointer union( List filePointers, GenomeLocParser parser ) { if ( filePointers == null || filePointers.isEmpty() ) { - return new FilePointer(); + return new FilePointer(IntervalMergingRule.ALL); } Map> fileChunks = new HashMap>(); List locations = new ArrayList(); + IntervalMergingRule mergeRule = filePointers.get(0).getIntervalMergingRule(); // First extract all intervals and file chunks from the FilePointers into unsorted, unmerged collections for ( FilePointer filePointer : filePointers ) { locations.addAll(filePointer.getLocations()); + if (mergeRule != filePointer.getIntervalMergingRule()) + throw new ReviewedStingException("All FilePointers in FilePointer.union() must have use the same IntervalMergeRule"); for ( Map.Entry fileSpanEntry : filePointer.getFileSpans().entrySet() ) { GATKBAMFileSpan fileSpan = (GATKBAMFileSpan)fileSpanEntry.getValue(); @@ -364,7 +378,7 @@ public class FilePointer { // Now sort and merge the intervals List sortedMergedLocations = new ArrayList(); - sortedMergedLocations.addAll(IntervalUtils.sortAndMergeIntervals(parser, locations, IntervalMergingRule.ALL)); + sortedMergedLocations.addAll(IntervalUtils.sortAndMergeIntervals(parser, locations, mergeRule)); // For each BAM file, convert from an unsorted, unmerged list of chunks to a GATKBAMFileSpan containing // the sorted, merged union of the chunks for that file @@ -375,7 +389,7 @@ public class FilePointer { (new GATKBAMFileSpan(unmergedChunks.toArray(new GATKChunk[unmergedChunks.size()]))).union(new GATKBAMFileSpan())); } - return new FilePointer(mergedFileSpans, sortedMergedLocations); + return new FilePointer(mergedFileSpans, mergeRule, sortedMergedLocations); } /** diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java index b476945ce..1e13e6bc5 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.picard.util.PeekableIterator; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import java.util.Iterator; @@ -54,8 +55,8 @@ public class IntervalSharder implements Iterator { return new IntervalSharder(BAMScheduler.createOverMappedReads(dataSource),parser); } - public static IntervalSharder shardOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { - return new IntervalSharder(BAMScheduler.createOverIntervals(dataSource,loci),loci.getGenomeLocParser()); + public static IntervalSharder shardOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci, final IntervalMergingRule intervalMergeRule) { + return new IntervalSharder(BAMScheduler.createOverIntervals(dataSource,intervalMergeRule,loci),loci.getGenomeLocParser()); } private IntervalSharder(final BAMScheduler scheduler, final GenomeLocParser parser) { diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index e8a2455e4..88998f4d8 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.baq.ReadTransformingIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; @@ -154,6 +155,11 @@ public class SAMDataSource { */ private final ThreadAllocation threadAllocation; + /** + * How are adjacent intervals merged by the sharder? + */ + private final IntervalMergingRule intervalMergingRule; + /** * Static set of unsupported programs that create bam files. * The key is the PG record ID and the value is the name of the tool that created it @@ -217,7 +223,8 @@ public class SAMDataSource { (byte) -1, false, false, - null); + null, + IntervalMergingRule.ALL); } /** @@ -236,6 +243,7 @@ public class SAMDataSource { * @param keepReadsInLIBS should we keep a unique list of reads in LIBS? * @param sampleRenameMap Map of BAM file to new sample ID used during on-the-fly runtime sample renaming. * Will be null if we're not doing sample renaming. + * @param intervalMergingRule how are adjacent intervals merged by the sharder */ public SAMDataSource( Collection samFiles, @@ -253,10 +261,12 @@ public class SAMDataSource { byte defaultBaseQualities, boolean removeProgramRecords, final boolean keepReadsInLIBS, - final Map sampleRenameMap) { + final Map sampleRenameMap, + final IntervalMergingRule intervalMergingRule) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; + this.intervalMergingRule = intervalMergingRule; readerIDs = samFiles; @@ -1182,7 +1192,7 @@ public class SAMDataSource { public Iterable createShardIteratorOverIntervals(final GenomeLocSortedSet intervals,final ShardBalancer shardBalancer) { if(intervals == null) throw new ReviewedStingException("Unable to create schedule from intervals; no intervals were provided."); - shardBalancer.initialize(this,IntervalSharder.shardOverIntervals(SAMDataSource.this,intervals),genomeLocParser); + shardBalancer.initialize(this,IntervalSharder.shardOverIntervals(SAMDataSource.this,intervals,intervalMergingRule),genomeLocParser); return shardBalancer; } } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java index 66463e576..1587b10a4 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java @@ -104,7 +104,7 @@ public class FindLargeShards extends CommandLineProgram { logger.info(String.format("PROGRESS: Calculating mean and variance: Contig\tRegion.Start\tRegion.Stop\tSize")); - IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet); + IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL); while(sharder.hasNext()) { FilePointer filePointer = sharder.next(); @@ -133,7 +133,7 @@ public class FindLargeShards extends CommandLineProgram { logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize")); out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n"); - sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet); + sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL); while(sharder.hasNext()) { FilePointer filePointer = sharder.next(); diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java index 02d0c66b9..4d007c329 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -50,6 +50,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.*; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -158,7 +159,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true, null); + false, (byte)30, false, true, null, IntervalMergingRule.ALL); engine.setReadsDataSource(dataSource); @@ -193,7 +194,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true, null); + false, (byte)30, false, true, null, IntervalMergingRule.ALL); engine.setReadsDataSource(dataSource); final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); @@ -234,7 +235,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true, null); + false, (byte)30, false, true, null, IntervalMergingRule.ALL); engine.setReadsDataSource(dataSource); final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); @@ -281,7 +282,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), filters, new ArrayList(), - false, (byte)30, false, true, null); + false, (byte)30, false, true, null, IntervalMergingRule.ALL); engine.setReadsDataSource(dataSource); diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java index e768faba4..97a426275 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/ActiveRegionShardBalancerUnitTest.java @@ -31,6 +31,7 @@ import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -82,7 +83,7 @@ public class ActiveRegionShardBalancerUnitTest extends BaseTest { end = myEnd; final GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getSequenceName(), i, myEnd); final Map fileSpans = Collections.emptyMap(); - final FilePointer fp = new FilePointer(fileSpans, Collections.singletonList(loc)); + final FilePointer fp = new FilePointer(fileSpans, IntervalMergingRule.ALL, Collections.singletonList(loc)); pointers.add(fp); } expectedLocs.add(Collections.singleton(genomeLocParser.createGenomeLoc(record.getSequenceName(), 0, end))); diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java index 36fe20940..40229cd2d 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/FilePointerUnitTest.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -61,12 +62,26 @@ public class FilePointerUnitTest extends BaseTest { @Test public void testFilePointerCombineDisjoint() { - FilePointer one = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5)); + FilePointer one = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,5)); one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); - FilePointer two = new FilePointer(genomeLocParser.createGenomeLoc("chr1",6,10)); + FilePointer two = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",6,10)); two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,2))); - FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,10)); + FilePointer result = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,10)); + result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2))); + + Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect"); + Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect"); + + //Now test that adjacent (but disjoint) intervals are properly handled with OVERLAPPING_ONLY + one = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",1,5)); + one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); + two = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",6,10)); + two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,2))); + + result = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, + genomeLocParser.createGenomeLoc("chr1",1,5), + genomeLocParser.createGenomeLoc("chr1",6,10)); result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2))); Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect"); @@ -75,26 +90,38 @@ public class FilePointerUnitTest extends BaseTest { @Test public void testFilePointerCombineJoint() { - FilePointer one = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5)); + FilePointer one = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,5)); one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2))); - FilePointer two = new FilePointer(genomeLocParser.createGenomeLoc("chr1",2,6)); + FilePointer two = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",2,6)); two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,3))); - FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,6)); + FilePointer result = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,6)); result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,3))); Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect"); Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect"); + + //Repeat the tests for OVERLAPPING_ONLY + one = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",1,5)); + one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2))); + two = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",2,6)); + two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,3))); + + result = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",1,6)); + result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,3))); + + Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect"); + Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect"); } @Test public void testFilePointerCombineOneSided() { - FilePointer filePointer = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5)); + FilePointer filePointer = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,5)); filePointer.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); - FilePointer empty = new FilePointer(genomeLocParser.createGenomeLoc("chr1",6,10)); + FilePointer empty = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",6,10)); // Do not add file spans to empty result - FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,10)); + FilePointer result = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,10)); result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1))); Assert.assertEquals(filePointer.combine(genomeLocParser,empty),result,"Combination of two file pointers is incorrect"); Assert.assertEquals(empty.combine(genomeLocParser,filePointer),result,"Combination of two file pointers is incorrect"); diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 280e48679..afadfd4a7 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -184,7 +185,7 @@ public class SAMDataSourceUnitTest extends BaseTest { (byte) -1, removeProgramRecords, false, - null); + null, IntervalMergingRule.ALL); List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); @@ -205,7 +206,7 @@ public class SAMDataSourceUnitTest extends BaseTest { (byte) -1, removeProgramRecords, false, - null); + null, IntervalMergingRule.ALL); List doRemoveProgramRecords = data.getHeader().getProgramRecords(); assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); @@ -247,6 +248,6 @@ public class SAMDataSourceUnitTest extends BaseTest { (byte) -1, true, false, - null); + null, IntervalMergingRule.ALL); } } diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 30c0c83b5..7fa27701e 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -481,7 +481,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true, null); + false, (byte)30, false, true, null, IntervalMergingRule.ALL); engine.setReadsDataSource(dataSource); final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index 003ab6cf9..ebaec3396 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -120,6 +120,28 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { execute("testNoCoverageDueToFiltering",spec); } + @Test + public void testAdjacentIntervals() { + String[] intervals = {"chr1:1-999", "chr1:1000-65536", "chr1:65537-80000", "chr1:80001-81000"}; + String[] bams = {publicTestDir+"exampleBAM.bam"}; + + String cmd = buildRootCmd(exampleFASTA, new ArrayList(Arrays.asList(bams)), new ArrayList(Arrays.asList(intervals))) + " -im OVERLAPPING_ONLY"; + WalkerTestSpec spec = new WalkerTestSpec(cmd, 0, new ArrayList()); + + File baseOutputFile = WalkerTest.createTempFile("depthofcoverageadjinterval", ".tmp"); + spec.setOutputFileLocation(baseOutputFile); + + spec.addAuxFile("84b95d62f53e28919d1b5286558a1cae", baseOutputFile); + spec.addAuxFile("e445d4529dd3e3caa486ab8f5ec63e49", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); + spec.addAuxFile("b69c89ba8b0c393b735616c2bc3aea76", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); + spec.addAuxFile("788988dac6119a02de2c8d4dfb06b727", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); + spec.addAuxFile("3769ed40ab3ccd2ed94a9dc05cc2bc2f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("1281605e022d7462fbbcd14de53d1ca3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("4b41d6ff88aa2662697cb7e4b5346cb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); + + execute("testAdjacentIntervals", spec); + } + public void testRefNHandling(boolean includeNs, final String md5) { String command = "-R " + b37KGReference + " -L 20:26,319,565-26,319,575 -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -T DepthOfCoverage -baseCounts --omitIntervalStatistics --omitLocusTable --omitPerSampleStats -o %s"; if ( includeNs ) command += " --includeRefNSites";