Added a bunch of fixes: MSRI wasn't working, sharding had broken edge cases, and SAMBAM DS needed to close the file handles.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@341 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8efedacabf
commit
12752cf893
|
|
@ -115,19 +115,19 @@ public abstract class ShardStrategy implements Iterator<Shard>, Iterable<Shard>
|
|||
public Shard next() {
|
||||
// lets get some background info on the problem
|
||||
long length = dic.getSequence(seqLoc).getSequenceLength();
|
||||
long proposedSize = nextShardSize() - 1;
|
||||
long proposedSize = nextShardSize();
|
||||
long nextStart = mLoc.getStop() + 1;
|
||||
// can we fit it into the current seq size?
|
||||
if (nextStart + proposedSize < length) {
|
||||
if (nextStart + proposedSize - 1 < length) {
|
||||
lastGenomeLocSize = proposedSize;
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize);
|
||||
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize));
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize-1);
|
||||
return Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + proposedSize-1));
|
||||
}
|
||||
// else we can't make it in the current location, we have to stitch one together
|
||||
else {
|
||||
long overflow = nextStart + proposedSize - length;
|
||||
long overflow = nextStart + proposedSize -1 - length;
|
||||
logger.debug("Overflow = " + overflow + " length: " + length);
|
||||
lastGenomeLocSize = lastGenomeLocSize - overflow;
|
||||
lastGenomeLocSize = proposedSize - overflow;
|
||||
// move to the next contig
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = Shard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceName(), nextStart, nextStart + lastGenomeLocSize));
|
||||
|
|
|
|||
|
|
@ -37,35 +37,31 @@ public class SAMBAMDataSource implements SimpleDataSource {
|
|||
// do we care that the SAM files respect the sort order.
|
||||
private boolean matchedSortOrders = true;
|
||||
|
||||
// our merged sam iterator for spliting up the files
|
||||
MergingSamRecordIterator2 mergeIterator;
|
||||
|
||||
// are we set to locus mode or read mode for dividing
|
||||
private boolean locusMode = true;
|
||||
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
|
||||
|
||||
// our list of readers
|
||||
private final List<File> samFileList = new ArrayList<File>();
|
||||
|
||||
/**
|
||||
* constructor, given a single sam file
|
||||
*
|
||||
* @param samFiles the list of sam files
|
||||
*/
|
||||
public SAMBAMDataSource(List<String> samFiles) throws SimpleDataSourceLoadException {
|
||||
List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
|
||||
for (String fileName : samFiles) {
|
||||
File smFile = new File(fileName);
|
||||
if (!smFile.canRead()) {
|
||||
throw new SimpleDataSourceLoadException("SAMBAMDataSource: Unable to load file: " + fileName);
|
||||
}
|
||||
SAMFileReader reader = initializeSAMFile(smFile);
|
||||
if (reader != null) {
|
||||
readers.add(reader);
|
||||
}
|
||||
samFileList.add(smFile);
|
||||
|
||||
}
|
||||
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER);
|
||||
this.mergeIterator = new MergingSamRecordIterator2(headerMerger);
|
||||
//SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(samFileList, SORT_ORDER);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -73,6 +69,7 @@ public class SAMBAMDataSource implements SimpleDataSource {
|
|||
if (samFile.toString().endsWith(".list")) {
|
||||
return null;
|
||||
} else {
|
||||
System.err.println("initializeSAMFile");
|
||||
SAMFileReader samReader = new SAMFileReader(samFile, true);
|
||||
samReader.setValidationStringency(strictness);
|
||||
|
||||
|
|
@ -96,19 +93,43 @@ public class SAMBAMDataSource implements SimpleDataSource {
|
|||
|
||||
/**
|
||||
* <p>
|
||||
* getQueryRegionIterator
|
||||
* seek
|
||||
* </p>
|
||||
*
|
||||
* @param location the genome location to extract data for
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
public MergingSamRecordIterator2 seek(GenomeLoc location) {
|
||||
MergingSamRecordIterator2 iter = null; // new MergingSamRecordIterator2(this.mergeIterator.getMergedHeader().);
|
||||
public MergingSamRecordIterator2 seek(GenomeLoc location) throws SimpleDataSourceLoadException {
|
||||
|
||||
// right now this is pretty damn heavy, it copies the file list into a reader list every time
|
||||
List<SAMFileReader> lst = new ArrayList<SAMFileReader>();
|
||||
for (File f : this.samFileList) {
|
||||
SAMFileReader reader = initializeSAMFile(f);
|
||||
if (reader == null) {
|
||||
throw new SimpleDataSourceLoadException("SAMBAMDataSource: Unable to load file: " + f);
|
||||
}
|
||||
lst.add(reader);
|
||||
}
|
||||
|
||||
// now merge the headers
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(lst, SORT_ORDER);
|
||||
|
||||
// make a merging iterator for this record
|
||||
MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);
|
||||
|
||||
|
||||
System.err.println("About to query");
|
||||
// we do different things for locus and read modes
|
||||
if (locusMode) {
|
||||
iter.query(location.getContig(), (int) location.getStart(), (int) location.getStop(), true);
|
||||
} else {
|
||||
iter.queryContained(location.getContig(), (int) location.getStart(), (int) location.getStop());
|
||||
}
|
||||
return iter; //To change body of implemented methods use File | Settings | File Templates.
|
||||
|
||||
// return the iterator
|
||||
return iter;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,6 @@ public interface SimpleDataSource extends Serializable {
|
|||
* @param location the genome location to extract data for
|
||||
* @return an iterator of the appropriate type, that is limited by the region
|
||||
*/
|
||||
public Iterator seek(GenomeLoc location);
|
||||
public Iterator seek(GenomeLoc location) throws SimpleDataSourceLoadException;
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,35 +10,30 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
||||
import edu.mit.broad.picard.sam.ReservedTagConstants;
|
||||
import edu.mit.broad.picard.PicardException;
|
||||
import edu.mit.broad.picard.sam.ReservedTagConstants;
|
||||
import edu.mit.broad.picard.sam.SamFileHeaderMerger;
|
||||
import edu.mit.broad.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
/*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever.
|
||||
* Neither the Broad Institute nor MIT can be responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Provides an iterator interface for merging multiple underlying iterators into a single
|
||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
||||
protected PriorityQueue<ComparableSamRecordIterator> pq;
|
||||
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
|
||||
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
||||
protected final SamFileHeaderMerger samHeaderMerger;
|
||||
protected final SAMFileHeader.SortOrder sortOrder;
|
||||
|
||||
protected boolean initialized = false;
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
|
|
@ -46,8 +41,18 @@ public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
|||
public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger) {
|
||||
this.samHeaderMerger = headerMerger;
|
||||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||
initializePQ();
|
||||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(samHeaderMerger.getReaders().size());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* this class MUST only be initialized once, since the creation of the
|
||||
*/
|
||||
private void lazyInitialization() {
|
||||
if (initialized) {
|
||||
throw new UnsupportedOperationException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
initialized = true;
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) {
|
||||
|
|
@ -59,17 +64,15 @@ public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
protected void initializePQ() {
|
||||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(samHeaderMerger.getReaders().size());
|
||||
}
|
||||
|
||||
public boolean supportsSeeking() {
|
||||
public boolean supportsSeeking() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void queryOverlapping(final String contig, final int start, final int stop) {
|
||||
initializePQ(); // reinitialize the system
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
initialized = true;
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
|
|
@ -80,9 +83,13 @@ public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
|||
}
|
||||
|
||||
public void query(final String contig, final int start, final int stop, final boolean contained) {
|
||||
initializePQ(); // reinitialize the system
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
initialized = true;
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
//reader.close();
|
||||
Iterator<SAMRecord> recordIter = reader.query(contig, start, stop, contained);
|
||||
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator);
|
||||
addIfNotEmpty(iterator);
|
||||
|
|
@ -90,7 +97,10 @@ public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
|||
}
|
||||
|
||||
public void queryContained(final String contig, final int start, final int stop) {
|
||||
initializePQ(); // reinitialize the system
|
||||
if (initialized) {
|
||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||
}
|
||||
initialized = true;
|
||||
final SAMRecordComparator comparator = getComparator();
|
||||
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
Iterator<SAMRecord> recordIter = reader.queryContained(contig, start, stop);
|
||||
|
|
@ -106,6 +116,9 @@ public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
|||
|
||||
/** Returns the next record from the top most iterator during merging. */
|
||||
public synchronized SAMRecord next() {
|
||||
if (!initialized) {
|
||||
lazyInitialization();
|
||||
}
|
||||
final ComparableSamRecordIterator iterator = this.pq.poll();
|
||||
final SAMRecord record = iterator.next();
|
||||
addIfNotEmpty(iterator);
|
||||
|
|
@ -186,6 +199,26 @@ public class MergingSamRecordIterator2 implements Iterator<SAMRecord> {
|
|||
public SAMFileHeader getMergedHeader() {
|
||||
return this.samHeaderMerger.getMergedHeader();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* closes all the file handles for the readers....DO THIS or you will run out of handles
|
||||
* with sharding.
|
||||
*/
|
||||
public void close() {
|
||||
for (SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* allows us to be used in the new style for loops
|
||||
* @return
|
||||
*/
|
||||
public Iterator<SAMRecord> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
// Should replace picard class with the same name
|
||||
|
|
|
|||
Loading…
Reference in New Issue