A checkpoint commit of two BAM reading projects going on simultaneously. These two projects
are works in progress, and this checkin will provide a baseline against which to gauge improvements to both projects. Low-memory BAM protoshards (disabled by default): - Currently passing ValidatingPileupIntegrationTest. - Gets progressively slower throughout the traversal, but should run at least as fast as original implementation. - Uses 10+ file handles per BAM, but should use 3. BAM performance microbenchmark test system: - Currently tests performance of BAM reading using SAM-JDK vs. GATK - Tests do not hit all GATK performance hotspots. - New tests that require input data in a slightly different form are hard to implement. - Output of test results is not easily parseable (investigating Google Caliper for possible improvements). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5317 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ad1e4f47b1
commit
600f73cbd6
|
|
@ -25,6 +25,7 @@
|
||||||
package net.sf.samtools;
|
package net.sf.samtools;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -53,8 +54,8 @@ public class GATKBAMFileSpan extends BAMFileSpan {
|
||||||
* Create a new chunk list from the given list of chunks.
|
* Create a new chunk list from the given list of chunks.
|
||||||
* @param chunks Constituent chunks.
|
* @param chunks Constituent chunks.
|
||||||
*/
|
*/
|
||||||
public GATKBAMFileSpan(final List<GATKChunk> chunks) {
|
public GATKBAMFileSpan(final GATKChunk[] chunks) {
|
||||||
super(new ArrayList<Chunk>(chunks));
|
super(Arrays.<Chunk>asList(chunks));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -33,38 +33,102 @@ import java.util.List;
|
||||||
* override GATKBin and make it public.
|
* override GATKBin and make it public.
|
||||||
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
|
||||||
*/
|
*/
|
||||||
public class GATKBin extends Bin {
|
public class GATKBin implements Comparable<GATKBin> {
|
||||||
|
/**
|
||||||
|
* The reference sequence associated with this bin.
|
||||||
|
*/
|
||||||
|
private final int referenceSequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of this bin within the BAM file.
|
||||||
|
*/
|
||||||
|
private final int binNumber;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The chunks associated with this bin.
|
||||||
|
*/
|
||||||
|
private GATKChunk[] chunkList;
|
||||||
|
|
||||||
|
public GATKBin(Bin bin) {
|
||||||
|
this(bin.getReferenceSequence(),bin.getBinNumber());
|
||||||
|
}
|
||||||
|
|
||||||
public GATKBin(final int referenceSequence, final int binNumber) {
|
public GATKBin(final int referenceSequence, final int binNumber) {
|
||||||
super(referenceSequence,binNumber);
|
this.referenceSequence = referenceSequence;
|
||||||
|
this.binNumber = binNumber;
|
||||||
}
|
}
|
||||||
|
|
||||||
public GATKBin(final Bin bin) {
|
|
||||||
super(bin.getReferenceSequence(),bin.getBinNumber());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getReferenceSequence() {
|
public int getReferenceSequence() {
|
||||||
return super.getReferenceSequence();
|
return referenceSequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getBinNumber() {
|
public int getBinNumber() {
|
||||||
return super.getBinNumber();
|
return binNumber;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<GATKChunk> getGATKChunkList() {
|
/**
|
||||||
List<GATKChunk> gatkChunks = new ArrayList<GATKChunk>();
|
* Convert this GATKBin to a normal bin, for processing with the standard BAM query interface.
|
||||||
for(Chunk chunk: getChunkList())
|
* @return
|
||||||
gatkChunks.add(new GATKChunk(chunk));
|
*/
|
||||||
return gatkChunks;
|
public Bin toBin() {
|
||||||
}
|
return new Bin(referenceSequence,binNumber);
|
||||||
|
|
||||||
public void setGATKChunkList(List<GATKChunk> chunks) {
|
|
||||||
super.setChunkList(new ArrayList<Chunk>(chunks));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See whether two bins are equal. If the ref seq and the bin number
|
||||||
|
* are equal, assume equality of the chunk list.
|
||||||
|
* @param other The other Bin to which to compare this.
|
||||||
|
* @return True if the two bins are equal. False otherwise.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public boolean equals(Object other) {
|
||||||
return String.format("Bin %d in contig %d",getBinNumber(),getReferenceSequence());
|
if(other == null) return false;
|
||||||
|
if(!(other instanceof GATKBin)) return false;
|
||||||
|
|
||||||
|
GATKBin otherBin = (GATKBin)other;
|
||||||
|
return this.referenceSequence == otherBin.referenceSequence && this.binNumber == otherBin.binNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute a unique hash code for the given reference sequence and bin number.
|
||||||
|
* @return A unique hash code.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return ((Integer)referenceSequence).hashCode() ^ ((Integer)binNumber).hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare two bins to see what ordering they should appear in.
|
||||||
|
* @param other Other bin to which this bin should be compared.
|
||||||
|
* @return -1 if this < other, 0 if this == other, 1 if this > other.
|
||||||
|
*/
|
||||||
|
public int compareTo(GATKBin other) {
|
||||||
|
if(other == null)
|
||||||
|
throw new ClassCastException("Cannot compare to a null object");
|
||||||
|
|
||||||
|
// Check the reference sequences first.
|
||||||
|
if(this.referenceSequence != other.referenceSequence)
|
||||||
|
return referenceSequence - other.referenceSequence;
|
||||||
|
|
||||||
|
// Then check the bin ordering.
|
||||||
|
return binNumber - other.binNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the chunks associated with this bin
|
||||||
|
*/
|
||||||
|
public void setChunkList(GATKChunk[] list){
|
||||||
|
chunkList = list;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the list of chunks associated with this bin.
|
||||||
|
* @return the chunks in this bin. If no chunks are associated, an empty list will be returned.
|
||||||
|
*/
|
||||||
|
public GATKChunk[] getChunkList(){
|
||||||
|
if(chunkList == null)
|
||||||
|
return new GATKChunk[0];
|
||||||
|
return chunkList;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -179,7 +179,7 @@ public class GATKArgumentCollection {
|
||||||
* method.
|
* method.
|
||||||
* @return The default downsampling mechanism, or null if none exists.
|
* @return The default downsampling mechanism, or null if none exists.
|
||||||
*/
|
*/
|
||||||
public DownsamplingMethod getDefaultDownsamplingMethod() {
|
public static DownsamplingMethod getDefaultDownsamplingMethod() {
|
||||||
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
|
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,266 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import net.sf.samtools.GATKBin;
|
||||||
|
import net.sf.samtools.GATKChunk;
|
||||||
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an 'index of the index' for a particular reference sequence
|
||||||
|
* within the BAM file, for easier whole-BAM-file traversal.
|
||||||
|
* file.
|
||||||
|
*/
|
||||||
|
public class BAMIndexBinIterator {
|
||||||
|
/**
|
||||||
|
* The source of index data.
|
||||||
|
*/
|
||||||
|
private final GATKBAMIndex index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The file storing the index data.
|
||||||
|
*/
|
||||||
|
private final File indexFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File storing index metadata.
|
||||||
|
*/
|
||||||
|
private final File metaIndexFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reference sequence that temporary file is based on.
|
||||||
|
*/
|
||||||
|
private final int referenceSequence;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size of a long in bytes.
|
||||||
|
*/
|
||||||
|
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||||
|
|
||||||
|
public BAMIndexBinIterator(final GATKBAMIndex index, final File indexFile, final int referenceSequence) {
|
||||||
|
this.index = index;
|
||||||
|
this.indexFile = indexFile;
|
||||||
|
this.referenceSequence = referenceSequence;
|
||||||
|
|
||||||
|
index.seek(4);
|
||||||
|
|
||||||
|
final int sequenceCount = index.readInteger();
|
||||||
|
|
||||||
|
if (referenceSequence >= sequenceCount)
|
||||||
|
throw new ReviewedStingException(String.format("Reference sequence past end of genome; reference sequence = %d, sequence count = %d",referenceSequence,sequenceCount));
|
||||||
|
|
||||||
|
index.skipToSequence(referenceSequence);
|
||||||
|
|
||||||
|
int binCount = index.readInteger();
|
||||||
|
|
||||||
|
try {
|
||||||
|
metaIndexFile = File.createTempFile("bammetaindex."+referenceSequence,null);
|
||||||
|
metaIndexFile.deleteOnExit();
|
||||||
|
|
||||||
|
FileOutputStream metaIndex = new FileOutputStream(metaIndexFile);
|
||||||
|
FileChannel metaIndexChannel = metaIndex.getChannel();
|
||||||
|
|
||||||
|
// zero out the contents of the file. Arrays of primitives in java are always zeroed out by default.
|
||||||
|
byte[] emptyContents = new byte[GATKBAMIndex.MAX_BINS*(Long.SIZE/8)]; // byte array is zeroed out by default.
|
||||||
|
metaIndexChannel.write(ByteBuffer.wrap(emptyContents));
|
||||||
|
|
||||||
|
ByteBuffer binPositionBuffer = ByteBuffer.allocate(emptyContents.length);
|
||||||
|
binPositionBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||||
|
long position = index.position();
|
||||||
|
|
||||||
|
final int indexBin = index.readInteger();
|
||||||
|
|
||||||
|
metaIndexChannel.position(indexBin*LONG_SIZE_IN_BYTES);
|
||||||
|
binPositionBuffer.putLong(position);
|
||||||
|
binPositionBuffer.flip();
|
||||||
|
|
||||||
|
System.out.printf("Writing bin number %d to position %d: coordinate = %d%n",indexBin,indexBin*Long.SIZE*8,position);
|
||||||
|
|
||||||
|
metaIndexChannel.write(binPositionBuffer);
|
||||||
|
binPositionBuffer.flip();
|
||||||
|
|
||||||
|
final int nChunks = index.readInteger();
|
||||||
|
index.skipBytes(16 * nChunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
metaIndexChannel.close();
|
||||||
|
metaIndex.close();
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to write BAM metaindex",ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
metaIndexFile.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CloseableIterator<GATKBin> getIteratorOverLevel(int level) {
|
||||||
|
return new LevelIterator(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class LevelIterator implements CloseableIterator<GATKBin> {
|
||||||
|
/**
|
||||||
|
* The raw BAM index file with unordered bins.
|
||||||
|
*/
|
||||||
|
private final FileInputStream indexInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The index metafile, with pointers to ordered bins.
|
||||||
|
*/
|
||||||
|
private final FileInputStream metaIndexInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The first and last bins in the level.
|
||||||
|
*/
|
||||||
|
private final int firstBinNumber, lastBinNumber;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The current bin in the index.
|
||||||
|
*/
|
||||||
|
private int currentBinNumber;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Position of the most recent chunk data.
|
||||||
|
*/
|
||||||
|
private GATKBin nextBin = null;
|
||||||
|
|
||||||
|
public LevelIterator(final int level) {
|
||||||
|
try {
|
||||||
|
indexInputStream = new FileInputStream(indexFile);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to open index file for reading");
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
metaIndexInputStream = new FileInputStream(metaIndexFile);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to open index metafile for reading");
|
||||||
|
}
|
||||||
|
|
||||||
|
firstBinNumber = GATKBAMIndex.getFirstBinInLevel(level);
|
||||||
|
lastBinNumber = firstBinNumber + index.getLevelSize(level) - 1;
|
||||||
|
|
||||||
|
currentBinNumber = firstBinNumber - 1;
|
||||||
|
advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
indexInputStream.close();
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to close index file.");
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
metaIndexInputStream.close();
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to close index metafile");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return nextBin != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKBin next() {
|
||||||
|
if(!hasNext())
|
||||||
|
throw new NoSuchElementException("Out of elements in BAMIndexBinIterator");
|
||||||
|
GATKBin currentPosition = nextBin;
|
||||||
|
advance();
|
||||||
|
return currentPosition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() { throw new UnsupportedOperationException("Cannot remove from a LevelIterator"); }
|
||||||
|
|
||||||
|
private void advance() {
|
||||||
|
ByteBuffer indexFilePositionDecoder = ByteBuffer.allocate(LONG_SIZE_IN_BYTES*2);
|
||||||
|
indexFilePositionDecoder.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
|
||||||
|
nextBin = null;
|
||||||
|
try {
|
||||||
|
indexFilePositionDecoder.limit(LONG_SIZE_IN_BYTES);
|
||||||
|
while(nextBin == null && currentBinNumber < lastBinNumber) {
|
||||||
|
currentBinNumber++;
|
||||||
|
metaIndexInputStream.getChannel().position(currentBinNumber*LONG_SIZE_IN_BYTES);
|
||||||
|
metaIndexInputStream.getChannel().read(indexFilePositionDecoder);
|
||||||
|
indexFilePositionDecoder.flip();
|
||||||
|
long currentPosition = indexFilePositionDecoder.getLong();
|
||||||
|
indexFilePositionDecoder.flip();
|
||||||
|
|
||||||
|
if(currentPosition != 0) {
|
||||||
|
indexInputStream.getChannel().position(currentPosition);
|
||||||
|
indexInputStream.getChannel().read(indexFilePositionDecoder);
|
||||||
|
|
||||||
|
indexFilePositionDecoder.flip();
|
||||||
|
int binNumber = indexFilePositionDecoder.getInt();
|
||||||
|
if(binNumber != currentBinNumber)
|
||||||
|
throw new ReviewedStingException("Index file and metaindex file are out of sync.");
|
||||||
|
|
||||||
|
int nChunks = indexFilePositionDecoder.getInt();
|
||||||
|
GATKChunk[] chunks = new GATKChunk[nChunks];
|
||||||
|
|
||||||
|
indexFilePositionDecoder.limit(LONG_SIZE_IN_BYTES*2);
|
||||||
|
indexFilePositionDecoder.clear();
|
||||||
|
|
||||||
|
for (int ci = 0; ci < nChunks; ci++) {
|
||||||
|
indexInputStream.getChannel().read(indexFilePositionDecoder);
|
||||||
|
|
||||||
|
indexFilePositionDecoder.flip();
|
||||||
|
final long chunkBegin = indexFilePositionDecoder.getLong();
|
||||||
|
final long chunkEnd = indexFilePositionDecoder.getLong();
|
||||||
|
chunks[ci] = new GATKChunk(chunkBegin, chunkEnd);
|
||||||
|
|
||||||
|
indexFilePositionDecoder.flip();
|
||||||
|
}
|
||||||
|
|
||||||
|
nextBin = new GATKBin(referenceSequence,binNumber);
|
||||||
|
nextBin.setChunkList(chunks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to close index metafile");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -100,7 +100,7 @@ class BAMIndexContent {
|
||||||
List<GATKChunk> allChunks = new ArrayList<GATKChunk>();
|
List<GATKChunk> allChunks = new ArrayList<GATKChunk>();
|
||||||
for (GATKBin b : mBinList)
|
for (GATKBin b : mBinList)
|
||||||
if (b.getChunkList() != null) {
|
if (b.getChunkList() != null) {
|
||||||
allChunks.addAll(b.getGATKChunkList());
|
allChunks.addAll(Arrays.asList(b.getChunkList()));
|
||||||
}
|
}
|
||||||
return Collections.unmodifiableList(allChunks);
|
return Collections.unmodifiableList(allChunks);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,198 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import net.sf.picard.util.PeekableIterator;
|
||||||
|
import net.sf.samtools.Bin;
|
||||||
|
import net.sf.samtools.GATKBin;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a tree of overlapping bins in a single
|
||||||
|
* BAM index.
|
||||||
|
*/
|
||||||
|
public class BinTree {
|
||||||
|
/**
|
||||||
|
* The BAM index from which this bin data is sourced.
|
||||||
|
*/
|
||||||
|
private final GATKBAMIndex index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The bins in this tree, organized by level.
|
||||||
|
*/
|
||||||
|
private final GATKBin[] bins;
|
||||||
|
|
||||||
|
public BinTree(GATKBAMIndex index,final GATKBin[] bins) {
|
||||||
|
this.index = index;
|
||||||
|
this.bins = bins;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKBin getLowestLevelBin() {
|
||||||
|
for(int i = bins.length-1; i >= 0; i--) {
|
||||||
|
if(bins[i] != null)
|
||||||
|
return bins[i];
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the bins from the bin tree.
|
||||||
|
* @return list of bins.
|
||||||
|
*/
|
||||||
|
public GATKBin[] getBins() {
|
||||||
|
return bins;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the number of bins in a given bin list.
|
||||||
|
* @return Number of bins in the list.
|
||||||
|
*/
|
||||||
|
public int size() {
|
||||||
|
return bins.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks overlap between this bin tree and other bin trees.
|
||||||
|
* @param position the position over which to detect overlap.
|
||||||
|
* @return True if the segment overlaps. False otherwise.
|
||||||
|
*/
|
||||||
|
public boolean overlaps(final GenomeLoc position) {
|
||||||
|
for(GATKBin gatkBin: bins) {
|
||||||
|
if(gatkBin == null)
|
||||||
|
continue;
|
||||||
|
Bin bin = new Bin(gatkBin.getReferenceSequence(),gatkBin.getBinNumber());
|
||||||
|
// Overlap occurs when the position is not disjoint with the bin boundaries.
|
||||||
|
if(!(position.getStop() < index.getFirstLocusInBin(bin) || position.getStart() > index.getLastLocusInBin(bin)))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterate through all bin trees in sequence, from those covering base 1 to those covering MAX_BINS.
|
||||||
|
*/
|
||||||
|
class BinTreeIterator implements Iterator<BinTree> {
|
||||||
|
/**
|
||||||
|
* The index over which to iterate.
|
||||||
|
*/
|
||||||
|
private final GATKBAMIndex index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterators over each individual level.
|
||||||
|
*/
|
||||||
|
private final PeekableIterator<GATKBin>[] levelIterators;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The next bin tree to be returned.
|
||||||
|
*/
|
||||||
|
private BinTree nextBinTree;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each iteration through the bin tree has a corresponding lowest level. Make sure
|
||||||
|
* every lowest-level bin is covered, whether that bin is present or not.
|
||||||
|
*/
|
||||||
|
private int currentBinInLowestLevel;
|
||||||
|
|
||||||
|
public BinTreeIterator(final GATKBAMIndex index, final File indexFile, final int referenceSequence) {
|
||||||
|
this.index = index;
|
||||||
|
|
||||||
|
BAMIndexBinIterator binIterator = new BAMIndexBinIterator(index,indexFile,referenceSequence);
|
||||||
|
levelIterators = new PeekableIterator[GATKBAMIndex.getNumIndexLevels()];
|
||||||
|
for(int level = 0; level < GATKBAMIndex.getNumIndexLevels(); level++)
|
||||||
|
levelIterators[level] = new PeekableIterator<GATKBin>(binIterator.getIteratorOverLevel(level));
|
||||||
|
|
||||||
|
// Set the current bin to one less that the first bin in the sequence. advance() will push it
|
||||||
|
// ahead to the first bin in the lowest level.
|
||||||
|
currentBinInLowestLevel = GATKBAMIndex.getFirstBinInLevel(GATKBAMIndex.getNumIndexLevels()-1) - 1;
|
||||||
|
|
||||||
|
advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return nextBinTree != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the next BinTree in the level.
|
||||||
|
* @return Next BinTree in sequence.
|
||||||
|
*/
|
||||||
|
public BinTree next() {
|
||||||
|
if(!hasNext())
|
||||||
|
throw new NoSuchElementException("BinTreeIterator is out of elements");
|
||||||
|
BinTree currentBinTree = nextBinTree;
|
||||||
|
advance();
|
||||||
|
return currentBinTree;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bring the bin tree ahead to the next overlapping structure.
|
||||||
|
*/
|
||||||
|
private void advance() {
|
||||||
|
final int lowestLevel = GATKBAMIndex.getNumIndexLevels()-1;
|
||||||
|
final int firstBinInLowestLevel = GATKBAMIndex.getFirstBinInLevel(lowestLevel);
|
||||||
|
final int binsInLowestLevel = index.getLevelSize(lowestLevel);
|
||||||
|
|
||||||
|
currentBinInLowestLevel++;
|
||||||
|
|
||||||
|
GATKBin[] bins = new GATKBin[GATKBAMIndex.getNumIndexLevels()];
|
||||||
|
nextBinTree = null;
|
||||||
|
while(nextBinTree == null) {
|
||||||
|
for(int level = lowestLevel; level >= 0; level--) {
|
||||||
|
if(!levelIterators[level].hasNext())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final int firstBinInThisLevel = GATKBAMIndex.getFirstBinInLevel(level);
|
||||||
|
final int binsInThisLevel = index.getLevelSize(level);
|
||||||
|
final int currentBinInThisLevel = ((currentBinInLowestLevel-firstBinInLowestLevel)*binsInThisLevel/binsInLowestLevel) + firstBinInThisLevel;
|
||||||
|
|
||||||
|
while(levelIterators[level].hasNext() && levelIterators[level].peek().getBinNumber() < currentBinInThisLevel)
|
||||||
|
levelIterators[level].next();
|
||||||
|
|
||||||
|
if(levelIterators[level].hasNext() && levelIterators[level].peek().getBinNumber() == currentBinInThisLevel)
|
||||||
|
bins[level] = levelIterators[level].peek();
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int level = 0; level <= lowestLevel; level++) {
|
||||||
|
if(bins[level] != null) {
|
||||||
|
nextBinTree = new BinTree(index,bins);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove unsupported.
|
||||||
|
*/
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Cannot remove elements from a BinTreeIterator");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -61,6 +61,10 @@ class FilePointer {
|
||||||
this.isRegionUnmapped = false;
|
this.isRegionUnmapped = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public FilePointer(final String referenceSequence) {
|
||||||
|
this(referenceSequence,null);
|
||||||
|
}
|
||||||
|
|
||||||
public void addLocation(GenomeLoc location) {
|
public void addLocation(GenomeLoc location) {
|
||||||
locations.add(location);
|
locations.add(location);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,14 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the file backing this index.
|
||||||
|
* @return The index file.
|
||||||
|
*/
|
||||||
|
public File getIndexFile() {
|
||||||
|
return mFile;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the number of levels employed by this index.
|
* Get the number of levels employed by this index.
|
||||||
* @return Number of levels in this index.
|
* @return Number of levels in this index.
|
||||||
|
|
@ -127,7 +135,7 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
*/
|
*/
|
||||||
public int getLevelSize(final int levelNumber) {
|
public int getLevelSize(final int levelNumber) {
|
||||||
if(levelNumber == getNumIndexLevels()-1)
|
if(levelNumber == getNumIndexLevels()-1)
|
||||||
return MAX_BINS-LEVEL_STARTS[levelNumber];
|
return MAX_BINS-LEVEL_STARTS[levelNumber]-1;
|
||||||
else
|
else
|
||||||
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
|
||||||
}
|
}
|
||||||
|
|
@ -272,7 +280,7 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
|
|
||||||
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
||||||
for(GATKBin bin: bins) {
|
for(GATKBin bin: bins) {
|
||||||
for(GATKChunk chunk: bin.getGATKChunkList())
|
for(GATKChunk chunk: bin.getChunkList())
|
||||||
chunkList.add(chunk.clone());
|
chunkList.add(chunk.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -281,7 +289,7 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
|
chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
|
||||||
return new GATKBAMFileSpan(chunkList);
|
return new GATKBAMFileSpan(chunkList.toArray(new GATKChunk[chunkList.size()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -322,13 +330,13 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
|
|
||||||
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
|
||||||
for(GATKBin coveringBin: binTree) {
|
for(GATKBin coveringBin: binTree) {
|
||||||
for(GATKChunk chunk: coveringBin.getGATKChunkList())
|
for(GATKChunk chunk: coveringBin.getChunkList())
|
||||||
chunkList.add(chunk.clone());
|
chunkList.add(chunk.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
final int start = getFirstLocusInBin(bin);
|
final int start = getFirstLocusInBin(bin);
|
||||||
chunkList = optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start));
|
chunkList = optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start));
|
||||||
return new GATKBAMFileSpan(chunkList);
|
return new GATKBAMFileSpan(chunkList.toArray(new GATKChunk[chunkList.size()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
public GATKBAMFileSpan getContentsOfBin(final Bin bin) {
|
public GATKBAMFileSpan getContentsOfBin(final Bin bin) {
|
||||||
|
|
@ -344,7 +352,7 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
|
|
||||||
GATKBin queriedBin = indexQuery.getBins().getBin(gatkBin.getBinNumber());
|
GATKBin queriedBin = indexQuery.getBins().getBin(gatkBin.getBinNumber());
|
||||||
|
|
||||||
return queriedBin != null ? new GATKBAMFileSpan(queriedBin.getGATKChunkList()) : null;
|
return queriedBin != null ? new GATKBAMFileSpan(queriedBin.getChunkList()) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -420,8 +428,7 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
skipBytes(16 * nChunks);
|
skipBytes(16 * nChunks);
|
||||||
}
|
}
|
||||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||||
bin.setGATKChunkList(chunks);
|
bin.setChunkList(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||||
bin.setLastChunk(lastChunk);
|
|
||||||
bins[indexBin] = bin;
|
bins[indexBin] = bin;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -560,7 +567,7 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipToSequence(final int sequenceIndex) {
|
protected void skipToSequence(final int sequenceIndex) {
|
||||||
for (int i = 0; i < sequenceIndex; i++) {
|
for (int i = 0; i < sequenceIndex; i++) {
|
||||||
// System.out.println("# Sequence TID: " + i);
|
// System.out.println("# Sequence TID: " + i);
|
||||||
final int nBins = readInteger();
|
final int nBins = readInteger();
|
||||||
|
|
@ -581,20 +588,23 @@ public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
|
||||||
mFileBuffer.get(bytes);
|
mFileBuffer.get(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int readInteger() {
|
protected int readInteger() {
|
||||||
return mFileBuffer.getInt();
|
return mFileBuffer.getInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
private long readLong() {
|
protected long readLong() {
|
||||||
return mFileBuffer.getLong();
|
return mFileBuffer.getLong();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipBytes(final int count) {
|
protected void skipBytes(final int count) {
|
||||||
mFileBuffer.position(mFileBuffer.position() + count);
|
mFileBuffer.position(mFileBuffer.position() + count);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void seek(final int position) {
|
protected void seek(final int position) {
|
||||||
mFileBuffer.position(position);
|
mFileBuffer.position(position);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected long position() {
|
||||||
|
return mFileBuffer.position();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -158,8 +158,8 @@ public class IntervalSharder {
|
||||||
if(!binIterator.hasNext())
|
if(!binIterator.hasNext())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
int locationStart = (int)location.getStart();
|
int locationStart = location.getStart();
|
||||||
final int locationStop = (int)location.getStop();
|
final int locationStop = location.getStop();
|
||||||
|
|
||||||
// Advance to first bin.
|
// Advance to first bin.
|
||||||
while(binIterator.peek().stop < locationStart)
|
while(binIterator.peek().stop < locationStart)
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,10 @@ public class LocusShardStrategy implements ShardStrategy {
|
||||||
else
|
else
|
||||||
intervals = locations;
|
intervals = locations;
|
||||||
|
|
||||||
this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
|
if(SAMDataSource.TRY_LOW_MEMORY_SHARDING)
|
||||||
|
this.filePointerIterator = new LowMemoryIntervalSharder(this.reads,intervals);
|
||||||
|
else
|
||||||
|
this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
final int maxShardSize = 100000;
|
final int maxShardSize = 100000;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,193 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
|
import net.sf.picard.util.PeekableIterator;
|
||||||
|
import net.sf.samtools.GATKBAMFileSpan;
|
||||||
|
import net.sf.samtools.GATKBin;
|
||||||
|
import net.sf.samtools.GATKChunk;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
|
||||||
|
*/
|
||||||
|
public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
||||||
|
private static Logger logger = Logger.getLogger(IntervalSharder.class);
|
||||||
|
|
||||||
|
private final SAMDataSource dataSource;
|
||||||
|
|
||||||
|
private final GenomeLocSortedSet loci;
|
||||||
|
|
||||||
|
private final PeekableIterator<GenomeLoc> locusIterator;
|
||||||
|
|
||||||
|
private GenomeLoc currentLocus;
|
||||||
|
|
||||||
|
private FilePointer nextFilePointer = null;
|
||||||
|
|
||||||
|
public LowMemoryIntervalSharder(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
this.loci = loci;
|
||||||
|
locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
|
||||||
|
if(locusIterator.hasNext())
|
||||||
|
currentLocus = locusIterator.next();
|
||||||
|
advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return nextFilePointer != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FilePointer next() {
|
||||||
|
if(!hasNext())
|
||||||
|
throw new NoSuchElementException("No next element available in interval sharder");
|
||||||
|
FilePointer currentFilePointer = nextFilePointer;
|
||||||
|
advance();
|
||||||
|
return currentFilePointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Unable to remove FilePointers from an IntervalSharder");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void advance() {
|
||||||
|
if(loci.isEmpty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
nextFilePointer = null;
|
||||||
|
while(nextFilePointer == null && currentLocus != null) {
|
||||||
|
nextFilePointer = new FilePointer(currentLocus.getContig());
|
||||||
|
|
||||||
|
int coveredRegionStart = 1;
|
||||||
|
int coveredRegionStop = Integer.MAX_VALUE;
|
||||||
|
GenomeLoc coveredRegion = null;
|
||||||
|
|
||||||
|
for(SAMReaderID reader: dataSource.getReaderIDs()) {
|
||||||
|
GATKBAMIndex index = (GATKBAMIndex)dataSource.getIndex(reader);
|
||||||
|
BinTree binTree = getNextOverlappingBinTree((GATKBAMIndex)dataSource.getIndex(reader),currentLocus);
|
||||||
|
if(binTree != null) {
|
||||||
|
coveredRegionStart = Math.max(coveredRegionStart,index.getFirstLocusInBin(binTree.getLowestLevelBin().toBin()));
|
||||||
|
coveredRegionStop = Math.min(coveredRegionStop,index.getLastLocusInBin(binTree.getLowestLevelBin().toBin()));
|
||||||
|
coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
|
||||||
|
|
||||||
|
GATKBAMFileSpan fileSpan = generateFileSpan(index,binTree,currentLocus);
|
||||||
|
nextFilePointer.addFileSpans(reader,fileSpan);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define the initial range of the file pointer, aka the region where the locus currently being processed intersects the BAM list.
|
||||||
|
GenomeLoc initialLocation = currentLocus.intersect(coveredRegion);
|
||||||
|
nextFilePointer.addLocation(initialLocation);
|
||||||
|
|
||||||
|
// See whether the BAM regions discovered overlap the next set of intervals in the interval list. If so, include every overlapping interval.
|
||||||
|
if(!nextFilePointer.locations.isEmpty()) {
|
||||||
|
while(locusIterator.hasNext() && locusIterator.peek().overlapsP(coveredRegion)) {
|
||||||
|
currentLocus = locusIterator.next();
|
||||||
|
nextFilePointer.addLocation(locusIterator.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chop off the uncovered portion of the locus. Since we know that the covered region overlaps the current locus,
|
||||||
|
// we can simplify the interval creation process to the end of the covered region to the stop of the given interval.
|
||||||
|
if(coveredRegionStop < currentLocus.getStop())
|
||||||
|
currentLocus = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStop+1,currentLocus.getStop());
|
||||||
|
else if(locusIterator.hasNext())
|
||||||
|
currentLocus = locusIterator.next();
|
||||||
|
else
|
||||||
|
currentLocus = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The last reference sequence processed by this iterator.
|
||||||
|
*/
|
||||||
|
private int lastReferenceSequenceLoaded = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The stateful iterator used to progress through the genoem.
|
||||||
|
*/
|
||||||
|
private PeekableIterator<BinTree> binTreeIterator = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next overlapping tree of bins associated with the given BAM file.
|
||||||
|
* @param index BAM index representation.
|
||||||
|
* @param locus Locus for which to grab the bin tree, if available.
|
||||||
|
* @return The BinTree overlapping the given locus.
|
||||||
|
*/
|
||||||
|
private BinTree getNextOverlappingBinTree(final GATKBAMIndex index, final GenomeLoc locus) {
|
||||||
|
// Stale reference sequence or first invocation. (Re)create the binTreeIterator.
|
||||||
|
if(locus.getContigIndex() != lastReferenceSequenceLoaded) {
|
||||||
|
if(binTreeIterator != null)
|
||||||
|
binTreeIterator.close();
|
||||||
|
lastReferenceSequenceLoaded = locus.getContigIndex();
|
||||||
|
binTreeIterator = new PeekableIterator<BinTree>(new BinTreeIterator(index,index.getIndexFile(),locus.getContigIndex()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!binTreeIterator.hasNext())
|
||||||
|
return null;
|
||||||
|
|
||||||
|
BinTree binTree = binTreeIterator.peek();
|
||||||
|
while(index.getLastLocusInBin(binTree.getLowestLevelBin().toBin()) < locus.getStart()) {
|
||||||
|
binTreeIterator.next(); // Before the point of interest. Consume this one.
|
||||||
|
binTree = binTreeIterator.peek();
|
||||||
|
}
|
||||||
|
|
||||||
|
if(binTree.overlaps(locus)) {
|
||||||
|
binTreeIterator.next();
|
||||||
|
return binTree;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a bin list to a file span, trimmed based on the linear index and with overlapping regions removed.
|
||||||
|
* @param index BAM index.
|
||||||
|
* @param binTree Tree of data found to overlap the region. binTree.overlaps(initialRegion) must return true.
|
||||||
|
* @param initialRegion The region to employ when trimming the linear index.
|
||||||
|
* @return File span mapping to given region.
|
||||||
|
*/
|
||||||
|
private GATKBAMFileSpan generateFileSpan(final GATKBAMIndex index, final BinTree binTree, final GenomeLoc initialRegion) {
|
||||||
|
List<GATKChunk> chunks = new ArrayList<GATKChunk>(binTree.size());
|
||||||
|
for(GATKBin bin: binTree.getBins()) {
|
||||||
|
if(bin == null)
|
||||||
|
continue;
|
||||||
|
chunks.addAll(Arrays.asList(bin.getChunkList()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optimize the chunk list with a linear index optimization
|
||||||
|
chunks = index.optimizeChunkList(chunks,index.getLinearIndex(initialRegion.getContigIndex()).getMinimumOffset(initialRegion.getStart()));
|
||||||
|
|
||||||
|
return new GATKBAMFileSpan(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -46,6 +46,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -79,6 +81,11 @@ public class SAMDataSource {
|
||||||
*/
|
*/
|
||||||
private final SAMFileReader.ValidationStringency validationStringency;
|
private final SAMFileReader.ValidationStringency validationStringency;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store BAM indices for each reader present.
|
||||||
|
*/
|
||||||
|
private final Map<SAMReaderID,GATKBAMIndex> bamIndices = new HashMap<SAMReaderID,GATKBAMIndex>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How far along is each reader?
|
* How far along is each reader?
|
||||||
*/
|
*/
|
||||||
|
|
@ -121,6 +128,8 @@ public class SAMDataSource {
|
||||||
*/
|
*/
|
||||||
private final SAMResourcePool resourcePool;
|
private final SAMResourcePool resourcePool;
|
||||||
|
|
||||||
|
static final boolean TRY_LOW_MEMORY_SHARDING = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new SAM data source given the supplied read metadata.
|
* Create a new SAM data source given the supplied read metadata.
|
||||||
* @param samFiles list of reads files.
|
* @param samFiles list of reads files.
|
||||||
|
|
@ -272,6 +281,17 @@ public class SAMDataSource {
|
||||||
originalToMergedReadGroupMappings.put(id,mappingToMerged);
|
originalToMergedReadGroupMappings.put(id,mappingToMerged);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(TRY_LOW_MEMORY_SHARDING) {
|
||||||
|
for(SAMReaderID id: readerIDs) {
|
||||||
|
File indexFile = findIndexFile(id.samFile);
|
||||||
|
if(indexFile != null) {
|
||||||
|
SAMSequenceDictionary sequenceDictionary = readers.getReader(id).getFileHeader().getSequenceDictionary();
|
||||||
|
GATKBAMIndex index = new GATKBAMIndex(indexFile,sequenceDictionary);
|
||||||
|
bamIndices.put(id,index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
resourcePool.releaseReaders(readers);
|
resourcePool.releaseReaders(readers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -366,14 +386,18 @@ public class SAMDataSource {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* True if all readers have an index.
|
* True if all readers have an index.
|
||||||
* @return
|
* @return True if all readers have an index.
|
||||||
*/
|
*/
|
||||||
public boolean hasIndex() {
|
public boolean hasIndex() {
|
||||||
for(SAMFileReader reader: resourcePool.getReadersWithoutLocking()) {
|
if(TRY_LOW_MEMORY_SHARDING)
|
||||||
if(!reader.hasIndex())
|
return readerIDs.size() == bamIndices.size();
|
||||||
return false;
|
else {
|
||||||
|
for(SAMFileReader reader: resourcePool.getReadersWithoutLocking()) {
|
||||||
|
if(!reader.hasIndex())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -382,8 +406,12 @@ public class SAMDataSource {
|
||||||
* @return The index. Will preload the index if necessary.
|
* @return The index. Will preload the index if necessary.
|
||||||
*/
|
*/
|
||||||
public BrowseableBAMIndex getIndex(final SAMReaderID id) {
|
public BrowseableBAMIndex getIndex(final SAMReaderID id) {
|
||||||
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
if(TRY_LOW_MEMORY_SHARDING)
|
||||||
return readers.getReader(id).getBrowseableIndex();
|
return bamIndices.get(id);
|
||||||
|
else {
|
||||||
|
SAMReaders readers = resourcePool.getReadersWithoutLocking();
|
||||||
|
return readers.getReader(id).getBrowseableIndex();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -701,11 +729,10 @@ public class SAMDataSource {
|
||||||
for(SAMReaderID readerID: readerIDs) {
|
for(SAMReaderID readerID: readerIDs) {
|
||||||
SAMFileReader reader = new SAMFileReader(readerID.samFile);
|
SAMFileReader reader = new SAMFileReader(readerID.samFile);
|
||||||
reader.enableFileSource(true);
|
reader.enableFileSource(true);
|
||||||
reader.enableIndexCaching(true);
|
if(!TRY_LOW_MEMORY_SHARDING)
|
||||||
|
reader.enableIndexCaching(true);
|
||||||
reader.setValidationStringency(validationStringency);
|
reader.setValidationStringency(validationStringency);
|
||||||
|
|
||||||
// If no read group is present, hallucinate one.
|
|
||||||
// TODO: Straw poll to see whether this is really required.
|
|
||||||
final SAMFileHeader header = reader.getFileHeader();
|
final SAMFileHeader header = reader.getFileHeader();
|
||||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||||
|
|
||||||
|
|
@ -944,6 +971,37 @@ public class SAMDataSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locates the index file alongside the given BAM, if present.
|
||||||
|
* TODO: This is currently a hachetjob that reaches into Picard and pulls out its index file locator. Replace with something more permanent.
|
||||||
|
* @param bamFile The data file to use.
|
||||||
|
* @return A File object if the index file is present; null otherwise.
|
||||||
|
*/
|
||||||
|
private File findIndexFile(File bamFile) {
|
||||||
|
File indexFile;
|
||||||
|
|
||||||
|
try {
|
||||||
|
Class bamFileReaderClass = Class.forName("net.sf.samtools.BAMFileReader");
|
||||||
|
Method indexFileLocator = bamFileReaderClass.getDeclaredMethod("findIndexFile",File.class);
|
||||||
|
indexFileLocator.setAccessible(true);
|
||||||
|
indexFile = (File)indexFileLocator.invoke(null,bamFile);
|
||||||
|
}
|
||||||
|
catch(ClassNotFoundException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to locate BAMFileReader class, used to check for index files");
|
||||||
|
}
|
||||||
|
catch(NoSuchMethodException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to locate Picard index file locator.");
|
||||||
|
}
|
||||||
|
catch(IllegalAccessException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to access Picard index file locator.");
|
||||||
|
}
|
||||||
|
catch(InvocationTargetException ex) {
|
||||||
|
throw new ReviewedStingException("Unable to invoke Picard index file locator.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return indexFile;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic suite for testing idealized and actual performance of read processing.
|
||||||
|
*/
|
||||||
|
public class BAMProcessingPerformanceMeter extends CommandLineProgram {
|
||||||
|
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = true)
|
||||||
|
File samFile;
|
||||||
|
|
||||||
|
@Input(fullName = "reference_file", shortName="R", doc = "Associated FASTA sequence", required = true)
|
||||||
|
File referenceFile;
|
||||||
|
|
||||||
|
@Argument(fullName="test_repetitions", shortName = "test_reps", doc="Number of times to repeat each test", required = false)
|
||||||
|
int testRepetitions = 5;
|
||||||
|
|
||||||
|
@Argument(fullName="print_frequency", shortName = "pf", doc="Print cumulative time after x # reads", required = false)
|
||||||
|
int printFrequency = 100000;
|
||||||
|
|
||||||
|
private void testBAMFileProcessingThroughput(ReadProcessor readProcessor) {
|
||||||
|
readProcessor.execute(samFile,referenceFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int execute() {
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new NoAdditionalProcessing(this));
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverEachBase(this));
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverCigarString(this));
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new ExtractTag(this,"OQ"));
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeSamLocusIterator(this));
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, GATKArgumentCollection.getDefaultDownsamplingMethod()));
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, DownsamplingMethod.NONE));
|
||||||
|
GATKWalkerInvoker countReadsInvoker = new GATKWalkerInvoker(this);
|
||||||
|
CountReadsPerformanceWalker countReadsWalker = new CountReadsPerformanceWalker(countReadsInvoker);
|
||||||
|
countReadsInvoker.setWalker(countReadsWalker);
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countReadsInvoker);
|
||||||
|
|
||||||
|
GATKWalkerInvoker countBasesInReadInvoker = new GATKWalkerInvoker(this);
|
||||||
|
CountBasesInReadPerformanceWalker countBasesInReadWalker = new CountBasesInReadPerformanceWalker(countBasesInReadInvoker);
|
||||||
|
countBasesInReadInvoker.setWalker(countBasesInReadWalker);
|
||||||
|
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countBasesInReadInvoker);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Required main method implementation.
|
||||||
|
* @param argv Command-line argument text.
|
||||||
|
* @throws Exception on error.
|
||||||
|
*/
|
||||||
|
public static void main(String[] argv) throws Exception {
|
||||||
|
int returnCode = 0;
|
||||||
|
try {
|
||||||
|
BAMProcessingPerformanceMeter instance = new BAMProcessingPerformanceMeter();
|
||||||
|
start(instance, argv);
|
||||||
|
returnCode = 0;
|
||||||
|
}
|
||||||
|
catch(Exception ex) {
|
||||||
|
returnCode = 1;
|
||||||
|
ex.printStackTrace();
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
System.exit(returnCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract class ReadProcessor {
|
||||||
|
private final SimpleTimer timer;
|
||||||
|
private final int printFrequency;
|
||||||
|
protected int iterations = 0;
|
||||||
|
|
||||||
|
public ReadProcessor(BAMProcessingPerformanceMeter performanceMeter) {
|
||||||
|
timer = new SimpleTimer("timer");
|
||||||
|
this.printFrequency = performanceMeter.printFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract String getTestName();
|
||||||
|
public String getIterationType() { return "loci"; }
|
||||||
|
|
||||||
|
public void processRead(final SAMRecord read) { }
|
||||||
|
public void execute(File bamFile,File fastaFile) {
|
||||||
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
|
startTest();
|
||||||
|
for(SAMRecord read: reader) {
|
||||||
|
processRead(read);
|
||||||
|
updateIterationCount();
|
||||||
|
}
|
||||||
|
stopTest();
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void startTest() {
|
||||||
|
timer.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void stopTest() {
|
||||||
|
timer.stop();
|
||||||
|
printStatus("TEST COMPLETE");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void updateIterationCount() {
|
||||||
|
if(++iterations % printFrequency == 0) printStatus("ONGOING");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printStatus(String prefix) {
|
||||||
|
System.out.printf("%s: %s printed %d %s in %f seconds.%n",prefix,getTestName(),iterations,getIterationType(),timer.getElapsedTime());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:55 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
|
||||||
|
private long As;
|
||||||
|
private long Cs;
|
||||||
|
private long Gs;
|
||||||
|
private long Ts;
|
||||||
|
|
||||||
|
private final GATKWalkerInvoker invoker;
|
||||||
|
|
||||||
|
public CountBasesInReadPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
|
||||||
|
this.invoker = walkerInvoker;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
|
for(byte base: read.getReadBases()) {
|
||||||
|
switch(base) {
|
||||||
|
case 'A': As++; break;
|
||||||
|
case 'C': Cs++; break;
|
||||||
|
case 'G': Gs++; break;
|
||||||
|
case 'T': Ts++; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
invoker.updateIterationCount();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long reduceInit() { return 0L; }
|
||||||
|
public Long reduce(Integer value, Long accum) { return value + accum; }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:55 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class CountReadsPerformanceWalker extends ReadWalker<Integer,Long> {
|
||||||
|
private final GATKWalkerInvoker invoker;
|
||||||
|
|
||||||
|
public CountReadsPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
|
||||||
|
this.invoker = walkerInvoker;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
|
invoker.updateIterationCount();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long reduceInit() { return 0L; }
|
||||||
|
public Long reduce(Integer value, Long accum) { return value + accum; }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:53 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class ExtractTag extends ReadProcessor {
|
||||||
|
private final String tag;
|
||||||
|
|
||||||
|
public ExtractTag(final BAMProcessingPerformanceMeter performanceMeter, final String tag) {
|
||||||
|
super(performanceMeter);
|
||||||
|
this.tag = tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() { return "extract tag"; }
|
||||||
|
public void processRead(final SAMRecord read) {
|
||||||
|
read.getAttribute(tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import org.broadinstitute.sting.commandline.Tags;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:54 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class GATKWalkerInvoker extends ReadProcessor {
|
||||||
|
/**
|
||||||
|
* Walker to run over the existing dataset.
|
||||||
|
*/
|
||||||
|
private Walker<?,?> walker;
|
||||||
|
|
||||||
|
public GATKWalkerInvoker(BAMProcessingPerformanceMeter performanceMeter) {
|
||||||
|
super(performanceMeter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() { return "GATK-CountReads"; }
|
||||||
|
|
||||||
|
public void setWalker(Walker<?,?> walker) {
|
||||||
|
this.walker = walker;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void execute(File samFile, File fastaFile) {
|
||||||
|
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||||
|
|
||||||
|
// Establish the argument collection
|
||||||
|
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||||
|
argCollection.referenceFile = fastaFile;
|
||||||
|
argCollection.samFiles = Collections.singletonList(samFile.getAbsolutePath());
|
||||||
|
|
||||||
|
engine.setArguments(argCollection);
|
||||||
|
// Bugs in the engine mean that this has to be set twice.
|
||||||
|
engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(samFile,new Tags())));
|
||||||
|
engine.setFilters(Collections.<SamRecordFilter>emptyList());
|
||||||
|
engine.setReferenceMetaDataFiles(Collections.<RMDTriplet>emptyList());
|
||||||
|
|
||||||
|
// Create the walker
|
||||||
|
engine.setWalker(walker);
|
||||||
|
|
||||||
|
startTest();
|
||||||
|
engine.execute();
|
||||||
|
stopTest();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.picard.filter.FilteringIterator;
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Tags;
|
||||||
|
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||||
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:54 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class InvokeLocusIteratorByState extends ReadProcessor {
|
||||||
|
private final DownsamplingMethod downsamplingMethod;
|
||||||
|
|
||||||
|
public InvokeLocusIteratorByState(final BAMProcessingPerformanceMeter performanceMeter,DownsamplingMethod downsamplingMethod) {
|
||||||
|
super(performanceMeter);
|
||||||
|
this.downsamplingMethod = downsamplingMethod;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() {
|
||||||
|
if(downsamplingMethod != DownsamplingMethod.NONE)
|
||||||
|
return String.format("invoke locus iterator by state; downsampling by sample to coverage = %d; ",downsamplingMethod.toCoverage);
|
||||||
|
else
|
||||||
|
return String.format("invoke locus iterator by state; no downsampling; ");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getIterationType() { return "loci"; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void execute(File samFile, File fastaFile) {
|
||||||
|
SAMFileReader reader = new SAMFileReader(samFile);
|
||||||
|
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(samFile,new Tags())),
|
||||||
|
reader.getFileHeader(),
|
||||||
|
false,
|
||||||
|
SAMFileReader.ValidationStringency.SILENT,
|
||||||
|
0,
|
||||||
|
downsamplingMethod,
|
||||||
|
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||||
|
Collections.<SamRecordFilter>emptyList(),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
BAQ.CalculationMode.OFF,
|
||||||
|
BAQ.QualityMode.DONT_MODIFY,
|
||||||
|
null,
|
||||||
|
(byte)0);
|
||||||
|
|
||||||
|
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||||
|
SampleDataSource sampleDataSource = new SampleDataSource();
|
||||||
|
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
|
||||||
|
|
||||||
|
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||||
|
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||||
|
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
|
||||||
|
startTest();
|
||||||
|
while(locusIteratorByState.hasNext()) {
|
||||||
|
locusIteratorByState.next();
|
||||||
|
updateIterationCount();
|
||||||
|
}
|
||||||
|
stopTest();
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.picard.util.SamLocusIterator;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:54 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class InvokeSamLocusIterator extends ReadProcessor {
|
||||||
|
public InvokeSamLocusIterator(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||||
|
super(performanceMeter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() {
|
||||||
|
return String.format("invoke sam locus iterator");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getIterationType() { return "loci"; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void execute(File samFile, File fastaFile) {
|
||||||
|
SAMFileReader reader = new SAMFileReader(samFile);
|
||||||
|
|
||||||
|
SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
|
||||||
|
samLocusIterator.setEmitUncoveredLoci(false);
|
||||||
|
Iterator<SamLocusIterator.LocusInfo> workhorseIterator = samLocusIterator.iterator();
|
||||||
|
|
||||||
|
startTest();
|
||||||
|
while(workhorseIterator.hasNext()) {
|
||||||
|
SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
|
||||||
|
updateIterationCount();
|
||||||
|
}
|
||||||
|
stopTest();
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:53 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class IterateOverCigarString extends ReadProcessor {
|
||||||
|
private long matchMismatches;
|
||||||
|
private long insertions;
|
||||||
|
private long deletions;
|
||||||
|
private long others;
|
||||||
|
|
||||||
|
public IterateOverCigarString(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||||
|
super(performanceMeter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() { return "iterator over cigar string"; }
|
||||||
|
public void processRead(final SAMRecord read) {
|
||||||
|
Cigar cigar = read.getCigar();
|
||||||
|
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||||
|
int elementSize = cigarElement.getLength();
|
||||||
|
while(elementSize > 0) {
|
||||||
|
switch(cigarElement.getOperator()) {
|
||||||
|
case M: matchMismatches++; break;
|
||||||
|
case I: insertions++; break;
|
||||||
|
case D: deletions++; break;
|
||||||
|
default: others++; break;
|
||||||
|
}
|
||||||
|
elementSize--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:53 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class IterateOverEachBase extends ReadProcessor {
|
||||||
|
private long As;
|
||||||
|
private long Cs;
|
||||||
|
private long Gs;
|
||||||
|
private long Ts;
|
||||||
|
|
||||||
|
public IterateOverEachBase(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||||
|
super(performanceMeter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() { return "iterate over each base"; }
|
||||||
|
public void processRead(final SAMRecord read) {
|
||||||
|
for(byte base: read.getReadBases()) {
|
||||||
|
switch(base) {
|
||||||
|
case 'A': As++; break;
|
||||||
|
case 'C': Cs++; break;
|
||||||
|
case 'G': Gs++; break;
|
||||||
|
case 'T': Ts++; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mhanna
|
||||||
|
* Date: Feb 25, 2011
|
||||||
|
* Time: 10:16:53 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
class NoAdditionalProcessing extends ReadProcessor {
|
||||||
|
public NoAdditionalProcessing(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||||
|
super(performanceMeter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getTestName() { return "no additional processing"; }
|
||||||
|
public void processRead(final SAMRecord read) {}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
|
||||||
|
|
||||||
|
import net.sf.samtools.util.BlockGunzipper;
|
||||||
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test decompression of a single BGZF block.
|
||||||
|
*/
|
||||||
|
public class UnzipSingleBlock extends CommandLineProgram {
|
||||||
|
@Input(fullName = "block_file", shortName = "b", doc = "block file over which to test unzipping", required = true)
|
||||||
|
private File blockFile;
|
||||||
|
|
||||||
|
@Input(fullName = "compressed_block_size", shortName = "cbs", doc = "size of compressed block", required = true)
|
||||||
|
private int compressedBufferSize;
|
||||||
|
|
||||||
|
public int execute() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException {
|
||||||
|
byte[] compressedBuffer = new byte[(int)blockFile.length()];
|
||||||
|
byte[] uncompressedBuffer = new byte[65536];
|
||||||
|
|
||||||
|
FileInputStream fis = new FileInputStream(blockFile);
|
||||||
|
fis.read(compressedBuffer);
|
||||||
|
fis.close();
|
||||||
|
|
||||||
|
BlockGunzipper gunzipper = new BlockGunzipper();
|
||||||
|
gunzipper.setCheckCrcs(true);
|
||||||
|
Method unzipBlock = BlockGunzipper.class.getDeclaredMethod("unzipBlock",byte[].class,byte[].class,Integer.TYPE);
|
||||||
|
unzipBlock.setAccessible(true);
|
||||||
|
|
||||||
|
unzipBlock.invoke(gunzipper,uncompressedBuffer,compressedBuffer,compressedBufferSize);
|
||||||
|
|
||||||
|
System.out.printf("SUCCESS!%n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Required main method implementation.
|
||||||
|
* @param argv Command-line argument text.
|
||||||
|
* @throws Exception on error.
|
||||||
|
*/
|
||||||
|
public static void main(String[] argv) throws Exception {
|
||||||
|
int returnCode = 0;
|
||||||
|
try {
|
||||||
|
UnzipSingleBlock instance = new UnzipSingleBlock();
|
||||||
|
start(instance, argv);
|
||||||
|
returnCode = 0;
|
||||||
|
}
|
||||||
|
catch(Exception ex) {
|
||||||
|
returnCode = 1;
|
||||||
|
ex.printStackTrace();
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
System.exit(returnCode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -88,7 +88,7 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
||||||
|
|
||||||
// Level 5
|
// Level 5
|
||||||
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(5),4681);
|
Assert.assertEquals(GATKBAMIndex.getFirstBinInLevel(5),4681);
|
||||||
Assert.assertEquals(bamIndex.getLevelSize(5),37449-4681+1);
|
Assert.assertEquals(bamIndex.getLevelSize(5),37448-4681+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue