Made IntervalSharder respect the IntervalMergingRule specified on the command line
* This addresses PT Bug 69741902 * Added a required IMR argument to FilePointer, BAMScheduler, IntervalSharder, and SAMDataSource * This rule is used by FilePointer.combine and FilePointer.union * Added unit and integration tests
This commit is contained in:
parent
4ce09d8693
commit
7a2b70a10f
|
|
@ -888,7 +888,8 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords,
|
||||
keepReadsInLIBS,
|
||||
sampleRenameMap);
|
||||
sampleRenameMap,
|
||||
argCollection.intervalArguments.intervalMerging);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -51,6 +52,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
private GenomeLocSortedSet loci;
|
||||
private PeekableIterator<GenomeLoc> locusIterator;
|
||||
private GenomeLoc currentLocus;
|
||||
private IntervalMergingRule intervalMergingRule;
|
||||
|
||||
/*
|
||||
* Creates BAMScheduler using contigs from the given BAM data source.
|
||||
|
|
@ -59,27 +61,28 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
* @return non-null BAM scheduler
|
||||
*/
|
||||
public static BAMScheduler createOverMappedReads(final SAMDataSource dataSource) {
|
||||
final BAMScheduler scheduler = new BAMScheduler(dataSource);
|
||||
final BAMScheduler scheduler = new BAMScheduler(dataSource, IntervalMergingRule.ALL);
|
||||
final GenomeLocSortedSet intervals = GenomeLocSortedSet.createSetFromSequenceDictionary(dataSource.getHeader().getSequenceDictionary());
|
||||
scheduler.populateFilteredIntervalList(intervals);
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
public static BAMScheduler createOverAllReads(final SAMDataSource dataSource, final GenomeLocParser parser) {
|
||||
BAMScheduler scheduler = new BAMScheduler(dataSource);
|
||||
BAMScheduler scheduler = new BAMScheduler(dataSource, IntervalMergingRule.ALL);
|
||||
scheduler.populateUnfilteredIntervalList(parser);
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
public static BAMScheduler createOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||
BAMScheduler scheduler = new BAMScheduler(dataSource);
|
||||
public static BAMScheduler createOverIntervals(final SAMDataSource dataSource, final IntervalMergingRule mergeRule, final GenomeLocSortedSet loci) {
|
||||
BAMScheduler scheduler = new BAMScheduler(dataSource, mergeRule);
|
||||
scheduler.populateFilteredIntervalList(loci);
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
|
||||
private BAMScheduler(final SAMDataSource dataSource) {
|
||||
private BAMScheduler(final SAMDataSource dataSource, final IntervalMergingRule mergeRule) {
|
||||
this.dataSource = dataSource;
|
||||
this.intervalMergingRule = mergeRule;
|
||||
for(SAMReaderID reader: dataSource.getReaderIDs()) {
|
||||
GATKBAMIndex index = dataSource.getIndex(reader);
|
||||
if(index != null)
|
||||
|
|
@ -124,7 +127,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
* @return A file pointer over the specified region.
|
||||
*/
|
||||
private FilePointer generatePointerOverEntireFileset() {
|
||||
FilePointer filePointer = new FilePointer();
|
||||
FilePointer filePointer = new FilePointer(intervalMergingRule);
|
||||
|
||||
// This is a "monolithic" FilePointer representing all regions in all files we will ever visit, and is
|
||||
// the only FilePointer we will create. This allows us to have this FilePointer represent regions from
|
||||
|
|
@ -165,14 +168,14 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
while(nextFilePointer == null && currentLocus != null) {
|
||||
// special case handling of the unmapped shard.
|
||||
if(currentLocus == GenomeLoc.UNMAPPED) {
|
||||
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
|
||||
nextFilePointer = new FilePointer(intervalMergingRule, GenomeLoc.UNMAPPED);
|
||||
for(SAMReaderID id: dataSource.getReaderIDs())
|
||||
nextFilePointer.addFileSpans(id,createSpanToEndOfFile(indexFiles.get(id).getStartOfLastLinearBin()));
|
||||
currentLocus = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
nextFilePointer = new FilePointer();
|
||||
nextFilePointer = new FilePointer(intervalMergingRule);
|
||||
|
||||
int coveredRegionStart = 1;
|
||||
int coveredRegionStop = Integer.MAX_VALUE;
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import java.util.*;
|
|||
public class FilePointer {
|
||||
protected final SortedMap<SAMReaderID,SAMFileSpan> fileSpans = new TreeMap<SAMReaderID,SAMFileSpan>();
|
||||
protected final List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
|
||||
protected final IntervalMergingRule intervalMergingRule;
|
||||
|
||||
/**
|
||||
* Does this file pointer point into an unmapped region?
|
||||
|
|
@ -65,7 +66,8 @@ public class FilePointer {
|
|||
private Integer contigIndex = null;
|
||||
|
||||
|
||||
public FilePointer( List<GenomeLoc> locations ) {
|
||||
public FilePointer( final IntervalMergingRule mergeRule, final List<GenomeLoc> locations ) {
|
||||
this.intervalMergingRule = mergeRule;
|
||||
this.locations.addAll(locations);
|
||||
this.isRegionUnmapped = checkUnmappedStatus();
|
||||
|
||||
|
|
@ -75,12 +77,12 @@ public class FilePointer {
|
|||
}
|
||||
}
|
||||
|
||||
public FilePointer( final GenomeLoc... locations ) {
|
||||
this(Arrays.asList(locations));
|
||||
public FilePointer( final IntervalMergingRule mergeRule, final GenomeLoc... locations ) {
|
||||
this(mergeRule, Arrays.asList(locations));
|
||||
}
|
||||
|
||||
public FilePointer( Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> locations ) {
|
||||
this(locations);
|
||||
public FilePointer( final Map<SAMReaderID,SAMFileSpan> fileSpans, final IntervalMergingRule mergeRule, final List<GenomeLoc> locations ) {
|
||||
this(mergeRule, locations);
|
||||
this.fileSpans.putAll(fileSpans);
|
||||
}
|
||||
|
||||
|
|
@ -152,6 +154,15 @@ public class FilePointer {
|
|||
return locations.size() > 0 ? locations.get(0).getContigIndex() : SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the IntervalMergingRule used by this FilePointer to merge adjacent locations
|
||||
*
|
||||
* @return the IntervalMergingRule used by this FilePointer (never null)
|
||||
*/
|
||||
public IntervalMergingRule getIntervalMergingRule() {
|
||||
return intervalMergingRule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this FilePointer "monolithic"? That is, does it represent all regions in all files that we will
|
||||
* ever visit during this GATK run? If this is set to true, the engine will expect to see only this
|
||||
|
|
@ -277,12 +288,12 @@ public class FilePointer {
|
|||
* @return A completely new file pointer that is the combination of the two.
|
||||
*/
|
||||
public FilePointer combine(final GenomeLocParser parser, final FilePointer other) {
|
||||
FilePointer combined = new FilePointer();
|
||||
FilePointer combined = new FilePointer(intervalMergingRule);
|
||||
|
||||
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
intervals.addAll(locations);
|
||||
intervals.addAll(other.locations);
|
||||
for(GenomeLoc interval: IntervalUtils.sortAndMergeIntervals(parser,intervals,IntervalMergingRule.ALL))
|
||||
for(GenomeLoc interval: IntervalUtils.sortAndMergeIntervals(parser,intervals,intervalMergingRule))
|
||||
combined.addLocation(interval);
|
||||
|
||||
PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> thisIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(this.fileSpans.entrySet().iterator());
|
||||
|
|
@ -340,15 +351,18 @@ public class FilePointer {
|
|||
*/
|
||||
public static FilePointer union( List<FilePointer> filePointers, GenomeLocParser parser ) {
|
||||
if ( filePointers == null || filePointers.isEmpty() ) {
|
||||
return new FilePointer();
|
||||
return new FilePointer(IntervalMergingRule.ALL);
|
||||
}
|
||||
|
||||
Map<SAMReaderID, List<GATKChunk>> fileChunks = new HashMap<SAMReaderID, List<GATKChunk>>();
|
||||
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
|
||||
IntervalMergingRule mergeRule = filePointers.get(0).getIntervalMergingRule();
|
||||
|
||||
// First extract all intervals and file chunks from the FilePointers into unsorted, unmerged collections
|
||||
for ( FilePointer filePointer : filePointers ) {
|
||||
locations.addAll(filePointer.getLocations());
|
||||
if (mergeRule != filePointer.getIntervalMergingRule())
|
||||
throw new ReviewedStingException("All FilePointers in FilePointer.union() must have use the same IntervalMergeRule");
|
||||
|
||||
for ( Map.Entry<SAMReaderID, SAMFileSpan> fileSpanEntry : filePointer.getFileSpans().entrySet() ) {
|
||||
GATKBAMFileSpan fileSpan = (GATKBAMFileSpan)fileSpanEntry.getValue();
|
||||
|
|
@ -364,7 +378,7 @@ public class FilePointer {
|
|||
|
||||
// Now sort and merge the intervals
|
||||
List<GenomeLoc> sortedMergedLocations = new ArrayList<GenomeLoc>();
|
||||
sortedMergedLocations.addAll(IntervalUtils.sortAndMergeIntervals(parser, locations, IntervalMergingRule.ALL));
|
||||
sortedMergedLocations.addAll(IntervalUtils.sortAndMergeIntervals(parser, locations, mergeRule));
|
||||
|
||||
// For each BAM file, convert from an unsorted, unmerged list of chunks to a GATKBAMFileSpan containing
|
||||
// the sorted, merged union of the chunks for that file
|
||||
|
|
@ -375,7 +389,7 @@ public class FilePointer {
|
|||
(new GATKBAMFileSpan(unmergedChunks.toArray(new GATKChunk[unmergedChunks.size()]))).union(new GATKBAMFileSpan()));
|
||||
}
|
||||
|
||||
return new FilePointer(mergedFileSpans, sortedMergedLocations);
|
||||
return new FilePointer(mergedFileSpans, mergeRule, sortedMergedLocations);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
|||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -54,8 +55,8 @@ public class IntervalSharder implements Iterator<FilePointer> {
|
|||
return new IntervalSharder(BAMScheduler.createOverMappedReads(dataSource),parser);
|
||||
}
|
||||
|
||||
public static IntervalSharder shardOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||
return new IntervalSharder(BAMScheduler.createOverIntervals(dataSource,loci),loci.getGenomeLocParser());
|
||||
public static IntervalSharder shardOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci, final IntervalMergingRule intervalMergeRule) {
|
||||
return new IntervalSharder(BAMScheduler.createOverIntervals(dataSource,intervalMergeRule,loci),loci.getGenomeLocParser());
|
||||
}
|
||||
|
||||
private IntervalSharder(final BAMScheduler scheduler, final GenomeLocParser parser) {
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.SimpleTimer;
|
|||
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
|
|
@ -154,6 +155,11 @@ public class SAMDataSource {
|
|||
*/
|
||||
private final ThreadAllocation threadAllocation;
|
||||
|
||||
/**
|
||||
* How are adjacent intervals merged by the sharder?
|
||||
*/
|
||||
private final IntervalMergingRule intervalMergingRule;
|
||||
|
||||
/**
|
||||
* Static set of unsupported programs that create bam files.
|
||||
* The key is the PG record ID and the value is the name of the tool that created it
|
||||
|
|
@ -217,7 +223,8 @@ public class SAMDataSource {
|
|||
(byte) -1,
|
||||
false,
|
||||
false,
|
||||
null);
|
||||
null,
|
||||
IntervalMergingRule.ALL);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -236,6 +243,7 @@ public class SAMDataSource {
|
|||
* @param keepReadsInLIBS should we keep a unique list of reads in LIBS?
|
||||
* @param sampleRenameMap Map of BAM file to new sample ID used during on-the-fly runtime sample renaming.
|
||||
* Will be null if we're not doing sample renaming.
|
||||
* @param intervalMergingRule how are adjacent intervals merged by the sharder
|
||||
*/
|
||||
public SAMDataSource(
|
||||
Collection<SAMReaderID> samFiles,
|
||||
|
|
@ -253,10 +261,12 @@ public class SAMDataSource {
|
|||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords,
|
||||
final boolean keepReadsInLIBS,
|
||||
final Map<String, String> sampleRenameMap) {
|
||||
final Map<String, String> sampleRenameMap,
|
||||
final IntervalMergingRule intervalMergingRule) {
|
||||
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.intervalMergingRule = intervalMergingRule;
|
||||
|
||||
readerIDs = samFiles;
|
||||
|
||||
|
|
@ -1182,7 +1192,7 @@ public class SAMDataSource {
|
|||
public Iterable<Shard> createShardIteratorOverIntervals(final GenomeLocSortedSet intervals,final ShardBalancer shardBalancer) {
|
||||
if(intervals == null)
|
||||
throw new ReviewedStingException("Unable to create schedule from intervals; no intervals were provided.");
|
||||
shardBalancer.initialize(this,IntervalSharder.shardOverIntervals(SAMDataSource.this,intervals),genomeLocParser);
|
||||
shardBalancer.initialize(this,IntervalSharder.shardOverIntervals(SAMDataSource.this,intervals,intervalMergingRule),genomeLocParser);
|
||||
return shardBalancer;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
|
||||
logger.info(String.format("PROGRESS: Calculating mean and variance: Contig\tRegion.Start\tRegion.Stop\tSize"));
|
||||
|
||||
IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet);
|
||||
IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL);
|
||||
while(sharder.hasNext()) {
|
||||
FilePointer filePointer = sharder.next();
|
||||
|
||||
|
|
@ -133,7 +133,7 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
|
||||
out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");
|
||||
|
||||
sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet);
|
||||
sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet,IntervalMergingRule.ALL);
|
||||
while(sharder.hasNext()) {
|
||||
FilePointer filePointer = sharder.next();
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.sam.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -158,7 +159,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true, null);
|
||||
false, (byte)30, false, true, null, IntervalMergingRule.ALL);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
|
||||
|
|
@ -193,7 +194,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true, null);
|
||||
false, (byte)30, false, true, null, IntervalMergingRule.ALL);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
|
||||
|
|
@ -234,7 +235,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true, null);
|
||||
false, (byte)30, false, true, null, IntervalMergingRule.ALL);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
|
||||
|
|
@ -281,7 +282,7 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
new ValidationExclusion(),
|
||||
filters,
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true, null);
|
||||
false, (byte)30, false, true, null, IntervalMergingRule.ALL);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import net.sf.samtools.SAMSequenceRecord;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -82,7 +83,7 @@ public class ActiveRegionShardBalancerUnitTest extends BaseTest {
|
|||
end = myEnd;
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getSequenceName(), i, myEnd);
|
||||
final Map<SAMReaderID, SAMFileSpan> fileSpans = Collections.emptyMap();
|
||||
final FilePointer fp = new FilePointer(fileSpans, Collections.singletonList(loc));
|
||||
final FilePointer fp = new FilePointer(fileSpans, IntervalMergingRule.ALL, Collections.singletonList(loc));
|
||||
pointers.add(fp);
|
||||
}
|
||||
expectedLocs.add(Collections.singleton(genomeLocParser.createGenomeLoc(record.getSequenceName(), 0, end)));
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -61,12 +62,26 @@ public class FilePointerUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testFilePointerCombineDisjoint() {
|
||||
FilePointer one = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
FilePointer one = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1)));
|
||||
FilePointer two = new FilePointer(genomeLocParser.createGenomeLoc("chr1",6,10));
|
||||
FilePointer two = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",6,10));
|
||||
two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,2)));
|
||||
|
||||
FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
FilePointer result = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2)));
|
||||
|
||||
Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect");
|
||||
Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect");
|
||||
|
||||
//Now test that adjacent (but disjoint) intervals are properly handled with OVERLAPPING_ONLY
|
||||
one = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1)));
|
||||
two = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",6,10));
|
||||
two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,2)));
|
||||
|
||||
result = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY,
|
||||
genomeLocParser.createGenomeLoc("chr1",1,5),
|
||||
genomeLocParser.createGenomeLoc("chr1",6,10));
|
||||
result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2)));
|
||||
|
||||
Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect");
|
||||
|
|
@ -75,26 +90,38 @@ public class FilePointerUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testFilePointerCombineJoint() {
|
||||
FilePointer one = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
FilePointer one = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2)));
|
||||
FilePointer two = new FilePointer(genomeLocParser.createGenomeLoc("chr1",2,6));
|
||||
FilePointer two = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",2,6));
|
||||
two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,3)));
|
||||
|
||||
FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,6));
|
||||
FilePointer result = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,6));
|
||||
result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,3)));
|
||||
|
||||
Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect");
|
||||
Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect");
|
||||
|
||||
//Repeat the tests for OVERLAPPING_ONLY
|
||||
one = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
one.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,2)));
|
||||
two = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",2,6));
|
||||
two.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(1,3)));
|
||||
|
||||
result = new FilePointer(IntervalMergingRule.OVERLAPPING_ONLY, genomeLocParser.createGenomeLoc("chr1",1,6));
|
||||
result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,3)));
|
||||
|
||||
Assert.assertEquals(one.combine(genomeLocParser,two),result,"Combination of two file pointers is incorrect");
|
||||
Assert.assertEquals(two.combine(genomeLocParser,one),result,"Combination of two file pointers is incorrect");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilePointerCombineOneSided() {
|
||||
FilePointer filePointer = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
FilePointer filePointer = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,5));
|
||||
filePointer.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1)));
|
||||
FilePointer empty = new FilePointer(genomeLocParser.createGenomeLoc("chr1",6,10));
|
||||
FilePointer empty = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",6,10));
|
||||
// Do not add file spans to empty result
|
||||
|
||||
FilePointer result = new FilePointer(genomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
FilePointer result = new FilePointer(IntervalMergingRule.ALL, genomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
result.addFileSpans(readerID,new GATKBAMFileSpan(new GATKChunk(0,1)));
|
||||
Assert.assertEquals(filePointer.combine(genomeLocParser,empty),result,"Combination of two file pointers is incorrect");
|
||||
Assert.assertEquals(empty.combine(genomeLocParser,filePointer),result,"Combination of two file pointers is incorrect");
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -184,7 +185,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
(byte) -1,
|
||||
removeProgramRecords,
|
||||
false,
|
||||
null);
|
||||
null, IntervalMergingRule.ALL);
|
||||
|
||||
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||
|
|
@ -205,7 +206,7 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
(byte) -1,
|
||||
removeProgramRecords,
|
||||
false,
|
||||
null);
|
||||
null, IntervalMergingRule.ALL);
|
||||
|
||||
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
|
||||
|
|
@ -247,6 +248,6 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
(byte) -1,
|
||||
true,
|
||||
false,
|
||||
null);
|
||||
null, IntervalMergingRule.ALL);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -481,7 +481,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true, null);
|
||||
false, (byte)30, false, true, null, IntervalMergingRule.ALL);
|
||||
|
||||
engine.setReadsDataSource(dataSource);
|
||||
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
|
||||
|
|
|
|||
|
|
@ -120,6 +120,28 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
execute("testNoCoverageDueToFiltering",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdjacentIntervals() {
|
||||
String[] intervals = {"chr1:1-999", "chr1:1000-65536", "chr1:65537-80000", "chr1:80001-81000"};
|
||||
String[] bams = {publicTestDir+"exampleBAM.bam"};
|
||||
|
||||
String cmd = buildRootCmd(exampleFASTA, new ArrayList<String>(Arrays.asList(bams)), new ArrayList<String>(Arrays.asList(intervals))) + " -im OVERLAPPING_ONLY";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmd, 0, new ArrayList<String>());
|
||||
|
||||
File baseOutputFile = WalkerTest.createTempFile("depthofcoverageadjinterval", ".tmp");
|
||||
spec.setOutputFileLocation(baseOutputFile);
|
||||
|
||||
spec.addAuxFile("84b95d62f53e28919d1b5286558a1cae", baseOutputFile);
|
||||
spec.addAuxFile("e445d4529dd3e3caa486ab8f5ec63e49", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("b69c89ba8b0c393b735616c2bc3aea76", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("788988dac6119a02de2c8d4dfb06b727", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
||||
spec.addAuxFile("3769ed40ab3ccd2ed94a9dc05cc2bc2f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||
spec.addAuxFile("1281605e022d7462fbbcd14de53d1ca3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
spec.addAuxFile("4b41d6ff88aa2662697cb7e4b5346cb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
|
||||
|
||||
execute("testAdjacentIntervals", spec);
|
||||
}
|
||||
|
||||
public void testRefNHandling(boolean includeNs, final String md5) {
|
||||
String command = "-R " + b37KGReference + " -L 20:26,319,565-26,319,575 -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -T DepthOfCoverage -baseCounts --omitIntervalStatistics --omitLocusTable --omitPerSampleStats -o %s";
|
||||
if ( includeNs ) command += " --includeRefNSites";
|
||||
|
|
|
|||
Loading…
Reference in New Issue