diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/BAMFileStat.java b/java/src/org/broadinstitute/sting/gatk/datasources/BAMFileStat.java index c8a046986..ddec0b314 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/BAMFileStat.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/BAMFileStat.java @@ -95,9 +95,9 @@ public class BAMFileStat extends CommandLineProgram { inspector.inspect(System.out,null,null); } - private class BAMFileIndexContentInspector extends CachingBAMFileIndex { + private class BAMFileIndexContentInspector /*extends CachingBAMFileIndex*/ { public BAMFileIndexContentInspector(File bamFileIndex) { - super(bamFileIndex); +// super(bamFileIndex); } public void inspect(PrintStream outputStream, Integer startPosition, Integer stopPosition) { diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java index 93a417571..046250894 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.apache.log4j.Logger; import java.util.*; @@ -42,6 +43,8 @@ import net.sf.picard.util.PeekableIterator; * @version 0.1 */ public class IntervalSharder { + private static Logger logger = Logger.getLogger(IntervalSharder.class); + public static Iterator shardIntervals(final BlockDrivenSAMDataSource dataSource, final List loci) { return new FilePointerIterator(dataSource,loci); } @@ -101,13 +104,13 @@ public class IntervalSharder { FilePointer lastFilePointer = null; BAMOverlap lastBAMOverlap = null; - Map readerToIndexMap = new HashMap(); + Map readerToIndexMap = new HashMap(); BinMergingIterator binMerger = new BinMergingIterator(); for(SAMReaderID id: dataSource.getReaderIDs()) { final SAMSequenceRecord referenceSequence = dataSource.getHeader(id).getSequence(contig); if(referenceSequence == null) continue; - final CachingBAMFileIndex index = dataSource.getIndex(id); + final BrowseableBAMIndex index = dataSource.getIndex(id); binMerger.addReader(id, index, referenceSequence.getSequenceIndex(), @@ -115,6 +118,7 @@ public class IntervalSharder { // Cache the reader for later data lookup. readerToIndexMap.put(id,index); } + PeekableIterator binIterator = new PeekableIterator(binMerger); for(GenomeLoc location: loci) { @@ -201,10 +205,9 @@ public class IntervalSharder { // Lookup the locations for every file pointer in the index. for(SAMReaderID id: readerToIndexMap.keySet()) { - CachingBAMFileIndex index = readerToIndexMap.get(id); + BrowseableBAMIndex index = readerToIndexMap.get(id); for(FilePointer filePointer: filePointers) - filePointer.addFileSpans(id,index.getChunksOverlapping(filePointer.overlap.getBin(id))); - index.close(); + filePointer.addFileSpans(id,index.getSpanOverlapping(filePointer.overlap.getBin(id))); } return filePointers; @@ -214,7 +217,7 @@ public class IntervalSharder { private PriorityQueue binQueue = new PriorityQueue(); private Queue pendingOverlaps = new LinkedList(); - public void addReader(final SAMReaderID id, final CachingBAMFileIndex index, final int referenceSequence, Iterator bins) { + public void addReader(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, Iterator bins) { binQueue.add(new BinQueueState(id,index,referenceSequence,new LowestLevelBinFilteringIterator(index,bins))); } @@ -292,85 +295,31 @@ public class IntervalSharder { if(binQueue.isEmpty()) throw new NoSuchElementException("No more bins are available"); BinQueueState current = binQueue.peek(); - return new ReaderBin(current.id,current.index,current.referenceSequence,current.bins.peek()); + return new ReaderBin(current.getReaderID(),current.getIndex(),current.getReferenceSequence(),current.peekNextBin()); } private ReaderBin getNextBin() { if(binQueue.isEmpty()) throw new NoSuchElementException("No more bins are available"); BinQueueState current = binQueue.remove(); - ReaderBin readerBin = new ReaderBin(current.id,current.index,current.referenceSequence,current.bins.next()); - if(current.bins.hasNext()) + ReaderBin readerBin = new ReaderBin(current.getReaderID(),current.getIndex(),current.getReferenceSequence(),current.nextBin()); + if(current.hasNextBin()) binQueue.add(current); return readerBin; } - private class ReaderBin { - public final SAMReaderID id; - public final CachingBAMFileIndex index; - public final int referenceSequence; - public final Bin bin; - - public ReaderBin(final SAMReaderID id, final CachingBAMFileIndex index, final int referenceSequence, final Bin bin) { - this.id = id; - this.index = index; - this.referenceSequence = referenceSequence; - this.bin = bin; - } - - public int getStart() { - return index.getFirstLocusInBin(bin); - } - - public int getStop() { - return index.getLastLocusInBin(bin); - } - } - - private class BinQueueState implements Comparable { - public final SAMReaderID id; - public final CachingBAMFileIndex index; - public final int referenceSequence; - public final PeekableIterator bins; - - public BinQueueState(final SAMReaderID id, final CachingBAMFileIndex index, final int referenceSequence, final Iterator bins) { - this.id = id; - this.index = index; - this.referenceSequence = referenceSequence; - this.bins = new PeekableIterator(bins); - } - - public int compareTo(BinQueueState other) { - if(!this.bins.hasNext() && !other.bins.hasNext()) return 0; - if(!this.bins.hasNext()) return -1; - if(!this.bins.hasNext()) return 1; - - int thisStart = this.index.getFirstLocusInBin(this.bins.peek()); - int otherStart = other.index.getFirstLocusInBin(other.bins.peek()); - - // Straight integer subtraction works here because lhsStart, rhsStart always positive. - if(thisStart != otherStart) - return thisStart - otherStart; - - int thisStop = this.index.getLastLocusInBin(this.bins.peek()); - int otherStop = other.index.getLastLocusInBin(other.bins.peek()); - - // Straight integer subtraction works here because lhsStop, rhsStop always positive. - return thisStop - otherStop; - } - } } /** * Filters out bins not at the lowest level in the tree. */ private static class LowestLevelBinFilteringIterator implements Iterator { - private CachingBAMFileIndex index; + private BrowseableBAMIndex index; private Iterator wrappedIterator; private Bin nextBin; - public LowestLevelBinFilteringIterator(final CachingBAMFileIndex index, Iterator iterator) { + public LowestLevelBinFilteringIterator(final BrowseableBAMIndex index, Iterator iterator) { this.index = index; this.wrappedIterator = iterator; advance(); @@ -396,7 +345,7 @@ public class IntervalSharder { nextBin = bin; } } - } + } } /** @@ -452,5 +401,101 @@ class BAMOverlap { } } +class ReaderBin { + public final SAMReaderID id; + public final BrowseableBAMIndex index; + public final int referenceSequence; + public final Bin bin; + + public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) { + this.id = id; + this.index = index; + this.referenceSequence = referenceSequence; + this.bin = bin; + } + + public int getStart() { + return index.getFirstLocusInBin(bin); + } + + public int getStop() { + return index.getLastLocusInBin(bin); + } +} + +class BinQueueState implements Comparable { + private final SAMReaderID id; + private final BrowseableBAMIndex index; + private final int referenceSequence; + private final PeekableIterator bins; + + private int firstLocusInCurrentBin; + private int lastLocusInCurrentBin; + + public BinQueueState(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Iterator bins) { + this.id = id; + this.index = index; + this.referenceSequence = referenceSequence; + this.bins = new PeekableIterator(bins); + refreshLocusInBinCache(); + } + + public SAMReaderID getReaderID() { + return id; + } + + public BrowseableBAMIndex getIndex() { + return index; + } + + public int getReferenceSequence() { + return referenceSequence; + } + + public boolean hasNextBin() { + return bins.hasNext(); + } + + public Bin peekNextBin() { + return bins.peek(); + } + + public Bin nextBin() { + Bin nextBin = bins.next(); + refreshLocusInBinCache(); + return nextBin; + } + + public int compareTo(BinQueueState other) { + if(!this.bins.hasNext() && !other.bins.hasNext()) return 0; + if(!this.bins.hasNext()) return -1; + if(!this.bins.hasNext()) return 1; + + // Both BinQueueStates have next bins. Before proceeding, make sure the bin cache is valid. + if(this.firstLocusInCurrentBin <= 0 || this.lastLocusInCurrentBin <= 0 || + other.firstLocusInCurrentBin <= 0 || other.lastLocusInCurrentBin <= 0) { + throw new StingException("Sharding mechanism error - bin->locus cache is invalid."); + } + + // Straight integer subtraction works here because lhsStart, rhsStart always positive. + if(this.firstLocusInCurrentBin != other.firstLocusInCurrentBin) + return this.firstLocusInCurrentBin - other.firstLocusInCurrentBin; + + // Straight integer subtraction works here because lhsStop, rhsStop always positive. + return this.lastLocusInCurrentBin - other.lastLocusInCurrentBin; + } + + private void refreshLocusInBinCache() { + firstLocusInCurrentBin = -1; + lastLocusInCurrentBin = -1; + if(bins.hasNext()) { + Bin bin = bins.peek(); + firstLocusInCurrentBin = index.getFirstLocusInBin(bin); + lastLocusInCurrentBin = index.getLastLocusInBin(bin); + } + } +} + + diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java index 1a712cbc8..fe1c4ff37 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java @@ -129,9 +129,9 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { * @param id Id of the reader. * @return The index. Will preload the index if necessary. */ - public CachingBAMFileIndex getIndex(final SAMReaderID id) { + public BrowseableBAMIndex getIndex(final SAMReaderID id) { SAMReaders readers = resourcePool.getReadersWithoutLocking(); - return readers.getReader(id).getIndex(CachingBAMFileIndex.class); + return readers.getReader(id).getBrowseableIndex(); } /** @@ -181,7 +181,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { * @return ID of the reader. */ public SAMReaderID getReaderID(SAMRecord read) { - return resourcePool.getReaderID(read.getReader()); + return resourcePool.getReaderID(read.getFileSource().getReader()); } /** @@ -191,7 +191,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { * @param read The read to add to the shard. */ private void addReadToBufferingShard(BAMFormatAwareShard shard,SAMReaderID id,SAMRecord read) { - SAMFileSpan endChunk = read.getFilePointer().getContentsFollowing(); + SAMFileSpan endChunk = read.getFileSource().getFilePointer().getContentsFollowing(); shard.addRead(read); readerPositions.put(id,endChunk); } @@ -204,7 +204,7 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { */ private SAMReaderID getReaderID(SAMReaders readers, SAMRecord read) { for(SAMReaderID id: getReaderIDs()) { - if(readers.getReader(id) == read.getReader()) + if(readers.getReader(id) == read.getFileSource().getReader()) return id; } throw new StingException("Unable to find id for reader associated with read " + read.getReadName()); @@ -405,7 +405,9 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { */ public SAMReaders(Reads sourceInfo) { for(File readsFile: sourceInfo.getReadsFiles()) { - SAMFileReader reader = new SAMFileReader(readsFile,CachingBAMFileIndex.class,true); + SAMFileReader reader = new SAMFileReader(readsFile,true); + reader.enableFileSource(true); + reader.enableIndexCaching(true); reader.setValidationStringency(sourceInfo.getValidationStringency()); // If no read group is present, hallucinate one. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java index d35e9fa5e..295b9d849 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java @@ -126,8 +126,8 @@ public class IndexDrivenSAMDataSource extends SAMDataSource { * @return ID of the reader. */ public SAMReaderID getReaderID(SAMRecord read) { - if(resourcePool.readerToIDMap.containsKey(read.getReader())) - return resourcePool.readerToIDMap.get(read.getReader()); + if(resourcePool.readerToIDMap.containsKey(read.getFileSource().getReader())) + return resourcePool.readerToIDMap.get(read.getFileSource().getReader()); throw new StingException("Unable to find reader id for record."); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 36d89cda5..18c4dc3f7 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -79,8 +79,8 @@ public class ReadFormattingIterator implements StingSAMIterator { // if we don't have a read group, set one. // TODO: Straw poll to see whether this is really required. - if (read.getAttribute(SAMTag.RG.toString()) == null && read.getReader() != null) { - List readGroups = read.getReader().getFileHeader().getReadGroups(); + if (read.getAttribute(SAMTag.RG.toString()) == null && read.getFileSource() != null && read.getFileSource().getReader() != null) { + List readGroups = read.getFileSource().getReader().getFileHeader().getReadGroups(); if (readGroups.size() == 1) { read.setAttribute(SAMTag.RG.toString(), readGroups.get(0).getReadGroupId()); read.setAttribute(SAMTag.SM.toString(), readGroups.get(0).getReadGroupId()); diff --git a/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f4db442ae..69a237bd1 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -315,8 +315,6 @@ public class GATKSAMRecord extends SAMRecord { public void setValidationStringency(net.sf.samtools.SAMFileReader.ValidationStringency validationStringency) { mRecord.setValidationStringency(validationStringency); } - public SAMFileReader getReader() { return mRecord.getReader(); } - public SAMFileHeader getHeader() { return mRecord.getHeader(); } public void setHeader(SAMFileHeader samFileHeader) { mRecord.setHeader(samFileHeader); } @@ -341,5 +339,5 @@ public class GATKSAMRecord extends SAMRecord { public String toString() { return mRecord.toString(); } - public SAMFileSpan getFilePointer() { return mRecord.getFilePointer(); } + public SAMFileSource getFileSource() { return mRecord.getFileSource(); } } diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1348-sharding.jar b/settings/repository/edu.mit.broad/picard-private-parts-1377-sharding.jar similarity index 72% rename from settings/repository/edu.mit.broad/picard-private-parts-1348-sharding.jar rename to settings/repository/edu.mit.broad/picard-private-parts-1377-sharding.jar index 21a144a4a..7ed9c6c8d 100644 Binary files a/settings/repository/edu.mit.broad/picard-private-parts-1348-sharding.jar and b/settings/repository/edu.mit.broad/picard-private-parts-1377-sharding.jar differ diff --git a/settings/repository/edu.mit.broad/picard-private-parts-1348-sharding.xml b/settings/repository/edu.mit.broad/picard-private-parts-1377-sharding.xml similarity index 55% rename from settings/repository/edu.mit.broad/picard-private-parts-1348-sharding.xml rename to settings/repository/edu.mit.broad/picard-private-parts-1377-sharding.xml index 3c45a435a..d434cb03d 100644 --- a/settings/repository/edu.mit.broad/picard-private-parts-1348-sharding.xml +++ b/settings/repository/edu.mit.broad/picard-private-parts-1377-sharding.xml @@ -1,3 +1,3 @@ - + diff --git a/settings/repository/net.sf/picard-1.17.373-sharding.xml b/settings/repository/net.sf/picard-1.17.373-sharding.xml deleted file mode 100644 index a11b12e62..000000000 --- a/settings/repository/net.sf/picard-1.17.373-sharding.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.17.373-sharding.jar b/settings/repository/net.sf/picard-1.19.395-sharding.jar similarity index 93% rename from settings/repository/net.sf/picard-1.17.373-sharding.jar rename to settings/repository/net.sf/picard-1.19.395-sharding.jar index 383271896..6da7e63ae 100644 Binary files a/settings/repository/net.sf/picard-1.17.373-sharding.jar and b/settings/repository/net.sf/picard-1.19.395-sharding.jar differ diff --git a/settings/repository/net.sf/picard-1.19.395-sharding.xml b/settings/repository/net.sf/picard-1.19.395-sharding.xml new file mode 100644 index 000000000..3608edd9c --- /dev/null +++ b/settings/repository/net.sf/picard-1.19.395-sharding.xml @@ -0,0 +1,3 @@ + + + diff --git a/settings/repository/net.sf/sam-1.17.373-sharding.jar b/settings/repository/net.sf/sam-1.19.395-sharding.jar similarity index 72% rename from settings/repository/net.sf/sam-1.17.373-sharding.jar rename to settings/repository/net.sf/sam-1.19.395-sharding.jar index 2eb8fca11..ebad98146 100644 Binary files a/settings/repository/net.sf/sam-1.17.373-sharding.jar and b/settings/repository/net.sf/sam-1.19.395-sharding.jar differ diff --git a/settings/repository/net.sf/sam-1.17.373-sharding.xml b/settings/repository/net.sf/sam-1.19.395-sharding.xml similarity index 52% rename from settings/repository/net.sf/sam-1.17.373-sharding.xml rename to settings/repository/net.sf/sam-1.19.395-sharding.xml index 128811159..9a5de6d8a 100644 --- a/settings/repository/net.sf/sam-1.17.373-sharding.xml +++ b/settings/repository/net.sf/sam-1.19.395-sharding.xml @@ -1,3 +1,3 @@ - +