Got back most of the performance lost when I fixed the dropped reads problem.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2835 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-02-12 19:59:56 +00:00
parent 04a2784bf7
commit 232d884578
6 changed files with 53 additions and 30 deletions

View File

@ -547,7 +547,7 @@ class BAMFileReader2
mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
}
mQueryType = queryType;
advance();
nextRead = advance();
}
/**
@ -565,7 +565,7 @@ class BAMFileReader2
if(!hasNext())
throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available");
final SAMRecord currentRead = nextRead;
advance();
nextRead = advance();
return currentRead;
}

View File

@ -6,6 +6,8 @@ import net.sf.samtools.SAMFileReader2;
import java.util.List;
import java.util.Map;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* A common interface for shards that natively understand the BAM format.
*
@ -17,5 +19,13 @@ public interface BAMFormatAwareShard extends Shard {
* Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard.
*/
public Map<SAMFileReader2,List<Chunk>> getChunks();
public Map<SAMFileReader2,List<Chunk>> getChunks();
/**
* Get the bounds of the current shard. Current bounds
* will be the unfiltered extents of the current shard, from
* the start of the first interval to the end of the last interval.
* @return The bounds of the shard.
*/
public GenomeLoc getBounds();
}

View File

@ -7,6 +7,8 @@ import java.util.List;
import java.util.Map;
import java.util.Collections;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* Expresses a shard of read data in block format.
*
@ -37,6 +39,16 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
return Collections.singletonMap(reader,chunks);
}
/**
* Get the bounds of the current shard. Current bounds
* will be the unfiltered extents of the current shard, from
* the start of the first interval to the end of the last interval.
* @return The bounds of the shard.
*/
public GenomeLoc getBounds() {
return null;
}
/**
* String representation of this shard.
* @return A string representation of the boundaries of this shard.

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import net.sf.samtools.Chunk;
import net.sf.samtools.SAMFileReader2;
@ -71,6 +72,27 @@ public class IndexDelimitedLocusShard extends LocusShard implements BAMFormatAwa
return chunks;
}
/**
* Get the bounds of the current shard. Current bounds
* will be the unfiltered extents of the current shard, from
* the start of the first interval to the end of the last interval.
* @return The bounds of the shard.
*/
public GenomeLoc getBounds() {
if(loci == null)
return null;
String contig = null;
long start = Long.MAX_VALUE, stop = 0;
for(GenomeLoc locus: loci) {
if(contig == null) contig = locus.getContig();
start = Math.min(locus.getStart(),start);
stop = Math.max(locus.getStop(),stop);
}
return GenomeLocParser.createGenomeLoc(contig,start,stop);
}
/**
* returns the type of shard.
*/

View File

@ -3,40 +3,19 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.util.Collections;
import java.util.List;
/**
*
* User: aaron
* Date: Apr 7, 2009
* Time: 1:19:49 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 7, 2009
* <p/>
* Class Shard
* <p/>
* This is the base class for locus shards. Right now it does little more then
* wrap GenomeLoc (actually nothing more), but it's good to have the class
* in place so it's easier to change guts later.
* @author aaron
* @version 1.0
* @date Apr 7, 2009
*/
public class LocusShard implements Shard {
// currently our location
final List<GenomeLoc> loci;
protected final List<GenomeLoc> loci;
public LocusShard(List<GenomeLoc> loci) {
this.loci = loci;

View File

@ -154,8 +154,8 @@ public class BlockDrivenSAMDataSource extends SAMDataSource {
Map<SAMFileReader,CloseableIterator<SAMRecord>> readerToIteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
for(Map.Entry<SAMFileReader2,List<Chunk>> chunksByReader: bamAwareShard.getChunks().entrySet()) {
SAMFileReader2 reader = chunksByReader.getKey();
List<Chunk> chunks = chunksByReader.getValue();
readerToIteratorMap.put(reader,reader.iterator(chunks));
GenomeLoc bounds = bamAwareShard.getBounds();
readerToIteratorMap.put(reader,reader.queryOverlapping(bounds.getContig(),(int)bounds.getStart(),(int)bounds.getStop()));
}
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.