Cleanup of int-packed file readers / writers. All primitive writers for BWTs and SAs are in place; time to move on to compound reader / writers.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1571 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-09-09 20:36:39 +00:00
parent d9f3e9493f
commit bc9fe31cf5
6 changed files with 187 additions and 78 deletions

View File

@ -0,0 +1,68 @@
package org.broadinstitute.sting.bwa;
import org.broadinstitute.sting.utils.StingException;
import java.io.*;
import java.nio.ByteOrder;
import java.nio.ByteBuffer;
/**
* Reads a packed version of the input stream.
*
* @author mhanna
* @version 0.1
*/
public class BasePackedInputStream<T> {
/**
* Type of object to unpack.
*/
private final Class<T> type;
/**
* Ultimate source for packed bases.
*/
private final InputStream targetInputStream;
/**
* A fixed-size buffer for word-packed data.
*/
private final ByteBuffer buffer;
public BasePackedInputStream( Class<T> type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
this(type,new BufferedInputStream(new FileInputStream(inputFile)),byteOrder);
}
public BasePackedInputStream( Class<T> type, InputStream inputStream, ByteOrder byteOrder ) throws FileNotFoundException {
if( type != Integer.class )
throw new StingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName());
this.targetInputStream = inputStream;
this.type = type;
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(type)/PackUtils.BITS_PER_BYTE).order(byteOrder);
}
/**
* Read the entire contents of the input stream.
* @param length number of bases to read from the stream.
* @throws IOException if an I/O error occurs.
*/
public byte[] read( int length ) throws IOException {
byte[] bwt = new byte[length];
int packedWord = 0;
final int basesPerEntry = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE;
for( int i = 0; i < length; i++ ) {
if( i % basesPerEntry == 0 ) {
buffer.rewind();
targetInputStream.read(buffer.array());
packedWord = buffer.getInt();
}
int position = basesPerEntry - i % basesPerEntry - 1;
bwt[i] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3));
}
return bwt;
}
}

View File

@ -50,6 +50,15 @@ public class BasePackedOutputStream<T> {
this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder);
}
/**
* Writes the given base to the output stream. Will write only this base; no packing will be performed.
* @param base List of bases to write.
* @throws IOException if an I/O error occurs.
*/
public void write( int base ) throws IOException {
write( new byte[] { (byte)base } );
}
/**
* Writes an array of bases to the target output stream.
* @param bases List of bases to write.

View File

@ -177,7 +177,7 @@ public class CreateBWTFromReference {
bwtOutputStream.write(buffer.array());
bwtOutputStream.flush();
PackedIntOutputStream occurrenceWriter = new PackedIntOutputStream(bwtOutputStream);
IntPackedOutputStream occurrenceWriter = new IntPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN);
occurrenceWriter.write(occurrences);
occurrenceWriter.flush();
@ -186,7 +186,7 @@ public class CreateBWTFromReference {
sequenceOutputStream.close();
OutputStream saOutputStream = new BufferedOutputStream(new FileOutputStream(saFile));
PackedIntOutputStream saIntWriter = new PackedIntOutputStream(saOutputStream);
IntPackedOutputStream saIntWriter = new IntPackedOutputStream(saOutputStream,ByteOrder.LITTLE_ENDIAN);
// SA file format is 'primary' (= SA-1[0]?), occurrence array, interval, sequence length, SA[]
saIntWriter.write(inverseSuffixArray[0]);
@ -198,8 +198,17 @@ public class CreateBWTFromReference {
saIntWriter.close();
File existingBwtFile = new File(inputFileName+".bwt");
WordPackedInputStream inputStream = new WordPackedInputStream(existingBwtFile,ByteOrder.LITTLE_ENDIAN);
byte[] existingBwt = inputStream.read();
InputStream existingBwtStream = new BufferedInputStream(new FileInputStream(existingBwtFile));
IntPackedInputStream existingIntReader = new IntPackedInputStream(existingBwtStream,ByteOrder.LITTLE_ENDIAN);
int existingFirstInverseSA = existingIntReader.read();
int[] existingOccurrences = new int[4];
existingIntReader.read(existingOccurrences);
BasePackedInputStream inputStream = new BasePackedInputStream<Integer>(Integer.class,existingBwtStream,ByteOrder.LITTLE_ENDIAN);
byte[] existingBwt = inputStream.read(existingOccurrences[3]);
String existingBwtAsString = new String(existingBwt);
System.out.printf("Existing BWT: %s...%n",existingBwtAsString.substring(0,80));

View File

@ -0,0 +1,76 @@
package org.broadinstitute.sting.bwa;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/**
* Read a set of integers packed into
*
* @author mhanna
* @version 0.1
*/
public class IntPackedInputStream {
/**
* Ultimate target for the occurrence array.
*/
private final InputStream targetInputStream;
/**
* The byte order in which integer input data appears.
*/
private final ByteBuffer buffer;
/**
* Create a new PackedIntInputStream, writing to the given target file.
* @param inputFile target input file.
* @param byteOrder Endianness to use when writing a list of integers.
* @throws java.io.IOException if an I/O error occurs.
*/
public IntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
this(new FileInputStream(inputFile),byteOrder);
}
/**
* Read ints from the given InputStream.
* @param inputStream Input stream from which to read ints.
* @param byteOrder Endianness to use when writing a list of integers.
* @throws IOException if an I/O error occurs.
*/
public IntPackedInputStream(InputStream inputStream, ByteOrder byteOrder) throws IOException {
this.targetInputStream = inputStream;
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
}
/**
* Read a datum from the input stream.
* @return The next input datum in the stream.
* @throws IOException if an I/O error occurs.
*/
public int read() throws IOException {
int[] data = new int[1];
read(data);
return data[0];
}
/**
* Read the data from the input stream.
* @param data placeholder for input data.
* @throws IOException if an I/O error occurs.
*/
public void read( int[] data ) throws IOException {
for(int i = 0; i < data.length; i++) {
targetInputStream.read(buffer.array());
data[i] = buffer.getInt();
buffer.rewind();
}
}
/**
* Closes the given output stream.
* @throws IOException if an I/O error occurs.
*/
public void close() throws IOException {
targetInputStream.close();
}
}

View File

@ -25,45 +25,46 @@
package org.broadinstitute.sting.bwa;
import org.broadinstitute.sting.utils.StingException;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
/**
* Writes an occurrence array to the output file.
* Writes an list of integers to the output file.
*
* @author mhanna
* @version 0.1
*/
public class PackedIntOutputStream {
/**
* How many bytes does it take to hold an integer in Java?
*/
private static final int INT_SIZE_IN_BYTES = 4;
public class IntPackedOutputStream {
/**
* Ultimate target for the occurrence array.
*/
private final OutputStream targetOutputStream;
/**
* A fixed-size buffer for int-packed data.
*/
private final ByteBuffer buffer;
/**
* Create a new PackedIntOutputStream, writing to the given target file.
* @param outputFile target output file.
* @param byteOrder Endianness to use when writing a list of integers.
* @throws IOException if an I/O error occurs.
*/
public PackedIntOutputStream( File outputFile ) throws IOException {
this(new FileOutputStream(outputFile));
public IntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
this(new FileOutputStream(outputFile),byteOrder);
}
/**
* Write packed ints to the given OutputStream.
* @param outputStream Output stream to which to write packed ints.
* @param byteOrder Endianness to use when writing a list of integers.
* @throws IOException if an I/O error occurs.
*/
public PackedIntOutputStream( OutputStream outputStream ) throws IOException {
public IntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) throws IOException {
this.targetOutputStream = outputStream;
buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
}
/**
@ -72,7 +73,7 @@ public class PackedIntOutputStream {
* @throws IOException if an I/O error occurs.
*/
public void write( int datum ) throws IOException {
ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES).order(ByteOrder.LITTLE_ENDIAN);
buffer.rewind();
buffer.putInt(datum);
targetOutputStream.write(buffer.array());
}
@ -87,6 +88,13 @@ public class PackedIntOutputStream {
write(datum);
}
/**
* Write the given chunk of data to the input stream.
* @param data data to write.
* @param offset position at which to start.
* @param length number of ints to write.
* @throws IOException if an I/O error occurs.
*/
public void write( int[] data, int offset, int length ) throws IOException {
for( int i = offset; i < offset+length; i++ )
write(data[i]);

View File

@ -1,61 +0,0 @@
package org.broadinstitute.sting.bwa;
import java.io.*;
import java.nio.ByteOrder;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.ArrayList;
/**
* Reads a word-packed version of the input stream.
*
* @author mhanna
* @version 0.1
*/
public class WordPackedInputStream {
/**
* Ultimate source for packed bases.
*/
private final InputStream targetInputStream;
/**
* A fixed-size buffer for word-packed data.
*/
private final ByteBuffer buffer;
public WordPackedInputStream( File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
this.targetInputStream = new BufferedInputStream(new FileInputStream(inputFile));
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
}
/**
* Read the entire contents of the input stream.
* @throws IOException if an I/O error occurs.
*/
public byte[] read() throws IOException {
// Skip over header info.
for( int i = 0; i < 5; i++ ) {
targetInputStream.read(buffer.array());
System.out.println("Skipping over: " + buffer.getInt());
buffer.rewind();
}
List<Byte> bwtList = new ArrayList<Byte>();
while(targetInputStream.read(buffer.array()) > 0) {
int packedWord = buffer.getInt();
for( int i = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE - 1; i >= 0; i-- ) {
byte packedByte = (byte)((packedWord >> i*2) & 0x3);
bwtList.add(PackUtils.unpackBase(packedByte));
}
buffer.rewind();
}
byte[] bwt = new byte[bwtList.size()];
for(int i = 0; i < bwtList.size(); i++)
bwt[i] = bwtList.get(i);
return bwt;
}
}