Prototype implementation of protoshard merging when protoshard n and protoshard

n+1 completely overlap.  Gives a small but consistent performance increase in 
non-intervaled whole exome traversals (2.79min original, 2.69min revised). 
Needs a more in depth analysis of optimal shard sizing to determine a true
optimum.

Also renamed a variable because Khalid disapproved of my naming choices.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5595 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-04-08 02:09:14 +00:00
parent 32d502c122
commit fece2167b3
6 changed files with 349 additions and 193 deletions

View File

@ -190,7 +190,7 @@ public class GATKBAMFileSpan extends BAMFileSpan {
* @return This file span minuse the other file span.
*/
public GATKBAMFileSpan subtract(final GATKBAMFileSpan other) {
public GATKBAMFileSpan minus(final GATKBAMFileSpan other) {
Iterator<GATKChunk> thisIterator = getGATKChunks().iterator();
Iterator<GATKChunk> otherIterator = other.getGATKChunks().iterator();
@ -204,7 +204,7 @@ public class GATKBAMFileSpan extends BAMFileSpan {
while(thisChunk != null && otherChunk != null) {
// If this iterator is before the other, add this to the subtracted list and forge ahead.
if(thisChunk.getChunkEnd() < otherChunk.getChunkStart()) {
if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) {
subtracted.add(thisChunk);
thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
continue;

View File

@ -0,0 +1,220 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.GATKBAMFileSpan;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
/**
* Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
*/
public class BAMScheduler implements Iterator<FilePointer> {
private final SAMDataSource dataSource;
private final Map<SAMReaderID,GATKBAMIndex> indices = new HashMap<SAMReaderID,GATKBAMIndex>();
private FilePointer nextFilePointer = null;
private final GenomeLocSortedSet loci;
private final PeekableIterator<GenomeLoc> locusIterator;
private GenomeLoc currentLocus;
public BAMScheduler(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
this.dataSource = dataSource;
for(SAMReaderID reader: dataSource.getReaderIDs())
indices.put(reader,(GATKBAMIndex)dataSource.getIndex(reader));
this.loci = loci;
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
if(locusIterator.hasNext())
currentLocus = locusIterator.next();
advance();
}
public boolean hasNext() {
return nextFilePointer != null;
}
public FilePointer next() {
if(!hasNext())
throw new NoSuchElementException("No next element available in interval sharder");
FilePointer currentFilePointer = nextFilePointer;
advance();
return currentFilePointer;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove FilePointers from an IntervalSharder");
}
private void advance() {
if(loci.isEmpty())
return;
nextFilePointer = null;
while(nextFilePointer == null && currentLocus != null) {
// special case handling of the unmapped shard.
if(currentLocus == GenomeLoc.UNMAPPED) {
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
for(SAMReaderID id: dataSource.getReaderIDs())
nextFilePointer.addFileSpans(id,null);
currentLocus = null;
continue;
}
nextFilePointer = new FilePointer();
int coveredRegionStart = 1;
int coveredRegionStop = Integer.MAX_VALUE;
GenomeLoc coveredRegion = null;
BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indices,currentLocus);
// No overlapping data at all.
if(scheduleEntry != null) {
coveredRegionStart = Math.max(coveredRegionStart,scheduleEntry.start);
coveredRegionStop = Math.min(coveredRegionStop,scheduleEntry.stop);
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
nextFilePointer.addFileSpans(scheduleEntry.fileSpans);
}
else {
// Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty.
//System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
for(SAMReaderID reader: indices.keySet())
nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
}
// Early exit if no bins were found.
if(coveredRegion == null) {
// for debugging only: maximum split is 16384.
if(currentLocus.size() > 16384) {
GenomeLoc[] splitContigs = currentLocus.split(currentLocus.getStart()+16384);
nextFilePointer.addLocation(splitContigs[0]);
currentLocus = splitContigs[1];
}
else {
nextFilePointer.addLocation(currentLocus);
currentLocus = locusIterator.hasNext() ? locusIterator.next() : null;
}
continue;
}
// Early exit if only part of the first interval was found.
if(currentLocus.startsBefore(coveredRegion)) {
// for debugging only: maximum split is 16384.
int splitPoint = Math.min(coveredRegion.getStart()-currentLocus.getStart(),16384)+currentLocus.getStart();
GenomeLoc[] splitContigs = currentLocus.split(splitPoint);
nextFilePointer.addLocation(splitContigs[0]);
currentLocus = splitContigs[1];
continue;
}
// Define the initial range of the file pointer, aka the region where the locus currently being processed intersects the BAM list.
GenomeLoc initialLocation = currentLocus.intersect(coveredRegion);
nextFilePointer.addLocation(initialLocation);
// See whether the BAM regions discovered overlap the next set of intervals in the interval list. If so, include every overlapping interval.
if(!nextFilePointer.locations.isEmpty()) {
while(locusIterator.hasNext() && locusIterator.peek().overlapsP(coveredRegion)) {
currentLocus = locusIterator.next();
nextFilePointer.addLocation(currentLocus.intersect(coveredRegion));
}
// Chop off the uncovered portion of the locus. Since we know that the covered region overlaps the current locus,
// we can simplify the interval creation process to the end of the covered region to the stop of the given interval.
if(coveredRegionStop < currentLocus.getStop())
currentLocus = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStop+1,currentLocus.getStop());
else if(locusIterator.hasNext())
currentLocus = locusIterator.next();
else
currentLocus = null;
}
}
}
/**
* The last reference sequence processed by this iterator.
*/
private Integer lastReferenceSequenceLoaded = null;
/**
* The stateful iterator used to progress through the genoem.
*/
private PeekableIterator<BAMScheduleEntry> bamScheduleIterator = null;
/**
* Get the next overlapping tree of bins associated with the given BAM file.
* @param indices BAM index representation.
* @param currentLocus The actual locus for which to check overlap.
* @return The next schedule entry overlapping with the given list of loci.
*/
private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,GATKBAMIndex> indices, final GenomeLoc currentLocus) {
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentLocus.getContigIndex()) {
if(bamScheduleIterator != null)
bamScheduleIterator.close();
lastReferenceSequenceLoaded = currentLocus.getContigIndex();
// Naive algorithm: find all elements in current contig for proper schedule creation.
List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
for(GenomeLoc locus: loci) {
if(locus.getContigIndex() == lastReferenceSequenceLoaded)
lociInContig.add(locus);
}
bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indices,lociInContig));
}
if(!bamScheduleIterator.hasNext())
return null;
// Peek the iterator along until finding the first binTree at or following the current locus.
BAMScheduleEntry bamScheduleEntry = bamScheduleIterator.peek();
while(bamScheduleEntry != null && bamScheduleEntry.isBefore(currentLocus)) {
bamScheduleIterator.next();
bamScheduleEntry = bamScheduleIterator.hasNext() ? bamScheduleIterator.peek() : null;
}
return (bamScheduleEntry != null && bamScheduleEntry.overlaps(currentLocus)) ? bamScheduleEntry : null;
}
}

View File

@ -24,22 +24,29 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.SAMFileSpan;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
/**
* Represents a small section of a BAM file, and every associated interval.
*/
class FilePointer {
protected final Map<SAMReaderID,SAMFileSpan> fileSpans = new HashMap<SAMReaderID,SAMFileSpan>();
protected final String referenceSequence;
protected final SortedMap<SAMReaderID,SAMFileSpan> fileSpans = new TreeMap<SAMReaderID,SAMFileSpan>();
protected final BAMOverlap overlap;
protected final List<GenomeLoc> locations;
@ -48,24 +55,22 @@ class FilePointer {
*/
protected final boolean isRegionUnmapped;
public FilePointer() {
this((BAMOverlap)null);
}
public FilePointer(final GenomeLoc location) {
this.referenceSequence = location.getContig();
this.overlap = null;
this.locations = Collections.singletonList(location);
this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
}
public FilePointer(final String referenceSequence,final BAMOverlap overlap) {
this.referenceSequence = referenceSequence;
public FilePointer(final BAMOverlap overlap) {
this.overlap = overlap;
this.locations = new ArrayList<GenomeLoc>();
this.isRegionUnmapped = false;
}
public FilePointer(final String referenceSequence) {
this(referenceSequence,null);
}
public void addLocation(final GenomeLoc location) {
locations.add(location);
}
@ -77,4 +82,89 @@ class FilePointer {
public void addFileSpans(final Map<SAMReaderID, GATKBAMFileSpan> fileSpans) {
this.fileSpans.putAll(fileSpans);
}
/**
* Computes the size of this file span, in uncompressed bytes.
* @return Size of the file span.
*/
public long size() {
long size = 0L;
for(SAMFileSpan fileSpan: fileSpans.values())
size += ((GATKBAMFileSpan)fileSpan).size();
return size;
}
/**
* Returns the difference in size between two filespans.
* @param other Other filespan against which to measure.
* @return The difference in size between the two file pointers.
*/
public long minus(final FilePointer other) {
long difference = 0;
PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> thisIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(this.fileSpans.entrySet().iterator());
PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> otherIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(other.fileSpans.entrySet().iterator());
while(thisIterator.hasNext() || otherIterator.hasNext()) {
int compareValue = thisIterator.peek().getKey().compareTo(otherIterator.peek().getKey());
if(compareValue < 0) {
// This before other.
difference += ((GATKBAMFileSpan)thisIterator.next().getValue()).size();
}
else if(compareValue > 0) {
// Other before this.
difference += ((GATKBAMFileSpan)otherIterator.next().getValue()).size();
}
else {
// equality; difference the values.
GATKBAMFileSpan thisRegion = (GATKBAMFileSpan)thisIterator.next().getValue();
GATKBAMFileSpan otherRegion = (GATKBAMFileSpan)otherIterator.next().getValue();
difference += Math.abs(thisRegion.minus(otherRegion).size());
}
}
return difference;
}
/**
* Combines two file pointers into one.
* @param parser The genomelocparser to use when manipulating intervals.
* @param other File pointer to combine into this one.
* @return A completely new file pointer that is the combination of the two.
*/
public FilePointer combine(final GenomeLocParser parser, final FilePointer other) {
FilePointer combined = new FilePointer();
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
intervals.addAll(locations);
intervals.addAll(other.locations);
for(GenomeLoc interval: IntervalUtils.sortAndMergeIntervals(parser,intervals,IntervalMergingRule.ALL))
combined.addLocation(interval);
PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> thisIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(this.fileSpans.entrySet().iterator());
PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> otherIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(other.fileSpans.entrySet().iterator());
while(thisIterator.hasNext() || otherIterator.hasNext()) {
int compareValue = thisIterator.peek().getKey().compareTo(otherIterator.peek().getKey());
if(compareValue < 0) {
// This before other.
Map.Entry<SAMReaderID,SAMFileSpan> entry = thisIterator.next();
combined.addFileSpans(entry.getKey(),entry.getValue());
}
else if(compareValue > 0) {
// Other before this.
Map.Entry<SAMReaderID,SAMFileSpan> entry = otherIterator.next();
combined.addFileSpans(entry.getKey(),entry.getValue());
}
else {
// equality; union the values.
SAMReaderID reader = thisIterator.peek().getKey();
GATKBAMFileSpan thisRegion = (GATKBAMFileSpan)thisIterator.next().getValue();
GATKBAMFileSpan otherRegion = (GATKBAMFileSpan)otherIterator.next().getValue();
combined.addFileSpans(reader,thisRegion.union(otherRegion));
}
}
return combined;
}
}

View File

@ -185,7 +185,7 @@ public class IntervalSharder {
filePointers.add(lastFilePointer);
lastBAMOverlap = bamOverlapIterator.next();
lastFilePointer = new FilePointer(contig,lastBAMOverlap);
lastFilePointer = new FilePointer(lastBAMOverlap);
binStart = lastFilePointer.overlap.start;
binStop = lastFilePointer.overlap.stop;
}

View File

@ -25,198 +25,44 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.GATKBAMFileSpan;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
/**
* Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
* Handles the process of aggregating BAM intervals into individual shards.
*/
public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
private static Logger logger = Logger.getLogger(IntervalSharder.class);
/**
* The iterator actually laying out the data for BAM scheduling.
*/
private final PeekableIterator<FilePointer> wrappedIterator;
private final SAMDataSource dataSource;
private final Map<SAMReaderID,GATKBAMIndex> indices = new HashMap<SAMReaderID,GATKBAMIndex>();
private FilePointer nextFilePointer = null;
private final GenomeLocSortedSet loci;
private final PeekableIterator<GenomeLoc> locusIterator;
private GenomeLoc currentLocus;
/**
* The parser, for interval manipulation.
*/
private final GenomeLocParser parser;
public LowMemoryIntervalSharder(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
this.dataSource = dataSource;
for(SAMReaderID reader: dataSource.getReaderIDs())
indices.put(reader,(GATKBAMIndex)dataSource.getIndex(reader));
this.loci = loci;
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
if(locusIterator.hasNext())
currentLocus = locusIterator.next();
advance();
wrappedIterator = new PeekableIterator<FilePointer>(new BAMScheduler(dataSource,loci));
parser = loci.getGenomeLocParser();
}
public boolean hasNext() {
return nextFilePointer != null;
return wrappedIterator.hasNext();
}
/**
* Accumulate shards where there's no additional cost to processing the next shard in the sequence.
* @return The next file pointer to process.
*/
public FilePointer next() {
if(!hasNext())
throw new NoSuchElementException("No next element available in interval sharder");
FilePointer currentFilePointer = nextFilePointer;
advance();
return currentFilePointer;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove FilePointers from an IntervalSharder");
}
private void advance() {
if(loci.isEmpty())
return;
nextFilePointer = null;
while(nextFilePointer == null && currentLocus != null) {
// special case handling of the unmapped shard.
if(currentLocus == GenomeLoc.UNMAPPED) {
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
for(SAMReaderID id: dataSource.getReaderIDs())
nextFilePointer.addFileSpans(id,null);
currentLocus = null;
continue;
}
nextFilePointer = new FilePointer(currentLocus.getContig());
int coveredRegionStart = 1;
int coveredRegionStop = Integer.MAX_VALUE;
GenomeLoc coveredRegion = null;
BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indices,currentLocus);
// No overlapping data at all.
if(scheduleEntry != null) {
coveredRegionStart = Math.max(coveredRegionStart,scheduleEntry.start);
coveredRegionStop = Math.min(coveredRegionStop,scheduleEntry.stop);
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
nextFilePointer.addFileSpans(scheduleEntry.fileSpans);
}
else {
// Always create a file span, whether there was covered data or not. If there was no covered data, then the binTree is empty.
//System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
for(SAMReaderID reader: indices.keySet())
nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
}
// Early exit if no bins were found.
if(coveredRegion == null) {
// for debugging only: maximum split is 16384.
if(currentLocus.size() > 16384) {
GenomeLoc[] splitContigs = currentLocus.split(currentLocus.getStart()+16384);
nextFilePointer.addLocation(splitContigs[0]);
currentLocus = splitContigs[1];
}
else {
nextFilePointer.addLocation(currentLocus);
currentLocus = locusIterator.hasNext() ? locusIterator.next() : null;
}
continue;
}
// Early exit if only part of the first interval was found.
if(currentLocus.startsBefore(coveredRegion)) {
// for debugging only: maximum split is 16384.
int splitPoint = Math.min(coveredRegion.getStart()-currentLocus.getStart(),16384)+currentLocus.getStart();
GenomeLoc[] splitContigs = currentLocus.split(splitPoint);
nextFilePointer.addLocation(splitContigs[0]);
currentLocus = splitContigs[1];
continue;
}
// Define the initial range of the file pointer, aka the region where the locus currently being processed intersects the BAM list.
GenomeLoc initialLocation = currentLocus.intersect(coveredRegion);
nextFilePointer.addLocation(initialLocation);
// See whether the BAM regions discovered overlap the next set of intervals in the interval list. If so, include every overlapping interval.
if(!nextFilePointer.locations.isEmpty()) {
while(locusIterator.hasNext() && locusIterator.peek().overlapsP(coveredRegion)) {
currentLocus = locusIterator.next();
nextFilePointer.addLocation(currentLocus.intersect(coveredRegion));
}
// Chop off the uncovered portion of the locus. Since we know that the covered region overlaps the current locus,
// we can simplify the interval creation process to the end of the covered region to the stop of the given interval.
if(coveredRegionStop < currentLocus.getStop())
currentLocus = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStop+1,currentLocus.getStop());
else if(locusIterator.hasNext())
currentLocus = locusIterator.next();
else
currentLocus = null;
}
}
}
/**
* The last reference sequence processed by this iterator.
*/
private Integer lastReferenceSequenceLoaded = null;
/**
* The stateful iterator used to progress through the genoem.
*/
private PeekableIterator<BAMScheduleEntry> bamScheduleIterator = null;
/**
* Get the next overlapping tree of bins associated with the given BAM file.
* @param indices BAM index representation.
* @param currentLocus The actual locus for which to check overlap.
* @return The next schedule entry overlapping with the given list of loci.
*/
private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,GATKBAMIndex> indices, final GenomeLoc currentLocus) {
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentLocus.getContigIndex()) {
if(bamScheduleIterator != null)
bamScheduleIterator.close();
lastReferenceSequenceLoaded = currentLocus.getContigIndex();
// Naive algorithm: find all elements in current contig for proper schedule creation.
List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
for(GenomeLoc locus: loci) {
if(locus.getContigIndex() == lastReferenceSequenceLoaded)
lociInContig.add(locus);
}
bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indices,lociInContig));
}
if(!bamScheduleIterator.hasNext())
return null;
// Peek the iterator along until finding the first binTree at or following the current locus.
BAMScheduleEntry bamScheduleEntry = bamScheduleIterator.peek();
while(bamScheduleEntry != null && bamScheduleEntry.isBefore(currentLocus)) {
bamScheduleIterator.next();
bamScheduleEntry = bamScheduleIterator.hasNext() ? bamScheduleIterator.peek() : null;
}
return (bamScheduleEntry != null && bamScheduleEntry.overlaps(currentLocus)) ? bamScheduleEntry : null;
FilePointer current = wrappedIterator.next();
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
current = current.combine(parser,wrappedIterator.next());
return current;
}
public void remove() { throw new UnsupportedOperationException("Unable to remove from an interval sharder."); }
}

View File

@ -51,7 +51,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
public static final String SIMPLIFY_BAM_FULLNAME = "simplifyBAM";
public static final String SIMPLIFY_BAM_SHORTNAME = SIMPLIFY_BAM_FULLNAME;
public static final String CREATE_INDEX_FULLNAME = "disable_bam_indexing";
public static final String DISABLE_INDEXING_FULLNAME = "disable_bam_indexing";
/**
* The engine into which output stubs should be fed.
@ -82,7 +82,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createBAMArgumentDefinition(source),
createBAMCompressionArgumentDefinition(source),
createWriteIndexArgumentDefinition(source),
disableWriteIndexArgumentDefinition(source),
createSimplifyBAMArgumentDefinition(source));
}
@ -117,7 +117,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
if( compressionLevel != null )
stub.setCompressionLevel(compressionLevel);
stub.setIndexOnTheFly(!argumentIsPresent(createWriteIndexArgumentDefinition(source),matches));
stub.setIndexOnTheFly(!argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches));
stub.setSimplifyBAM(argumentIsPresent(createSimplifyBAMArgumentDefinition(source),matches));
// WARNING: Side effects required by engine!
@ -171,10 +171,10 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
null );
}
private ArgumentDefinition createWriteIndexArgumentDefinition(ArgumentSource source) {
private ArgumentDefinition disableWriteIndexArgumentDefinition(ArgumentSource source) {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
CREATE_INDEX_FULLNAME,
DISABLE_INDEXING_FULLNAME,
null,
"Turn off on-the-fly creation of indices for output BAM files.",
false,