diff --git a/java/src/org/broadinstitute/sting/bwa/BasePackedInputStream.java b/java/src/org/broadinstitute/sting/bwa/BasePackedInputStream.java new file mode 100644 index 000000000..fe47af206 --- /dev/null +++ b/java/src/org/broadinstitute/sting/bwa/BasePackedInputStream.java @@ -0,0 +1,68 @@ +package org.broadinstitute.sting.bwa; + +import org.broadinstitute.sting.utils.StingException; + +import java.io.*; +import java.nio.ByteOrder; +import java.nio.ByteBuffer; + +/** + * Reads a packed version of the input stream. + * + * @author mhanna + * @version 0.1 + */ +public class BasePackedInputStream { + /** + * Type of object to unpack. + */ + private final Class type; + + /** + * Ultimate source for packed bases. + */ + private final InputStream targetInputStream; + + /** + * A fixed-size buffer for word-packed data. + */ + private final ByteBuffer buffer; + + public BasePackedInputStream( Class type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException { + this(type,new BufferedInputStream(new FileInputStream(inputFile)),byteOrder); + } + + public BasePackedInputStream( Class type, InputStream inputStream, ByteOrder byteOrder ) throws FileNotFoundException { + if( type != Integer.class ) + throw new StingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName()); + + this.targetInputStream = inputStream; + this.type = type; + this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(type)/PackUtils.BITS_PER_BYTE).order(byteOrder); + } + + /** + * Read the entire contents of the input stream. + * @param length number of bases to read from the stream. + * @throws IOException if an I/O error occurs. + */ + public byte[] read( int length ) throws IOException { + byte[] bwt = new byte[length]; + int packedWord = 0; + + final int basesPerEntry = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE; + for( int i = 0; i < length; i++ ) { + if( i % basesPerEntry == 0 ) { + buffer.rewind(); + targetInputStream.read(buffer.array()); + packedWord = buffer.getInt(); + } + + int position = basesPerEntry - i % basesPerEntry - 1; + bwt[i] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3)); + } + + return bwt; + } + +} diff --git a/java/src/org/broadinstitute/sting/bwa/BasePackedOutputStream.java b/java/src/org/broadinstitute/sting/bwa/BasePackedOutputStream.java index 16f9947a3..a84cd6c00 100644 --- a/java/src/org/broadinstitute/sting/bwa/BasePackedOutputStream.java +++ b/java/src/org/broadinstitute/sting/bwa/BasePackedOutputStream.java @@ -50,6 +50,15 @@ public class BasePackedOutputStream { this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder); } + /** + * Writes the given base to the output stream. Will write only this base; no packing will be performed. + * @param base List of bases to write. + * @throws IOException if an I/O error occurs. + */ + public void write( int base ) throws IOException { + write( new byte[] { (byte)base } ); + } + /** * Writes an array of bases to the target output stream. * @param bases List of bases to write. diff --git a/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java b/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java index cc7173236..adc3442e1 100755 --- a/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java +++ b/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java @@ -177,7 +177,7 @@ public class CreateBWTFromReference { bwtOutputStream.write(buffer.array()); bwtOutputStream.flush(); - PackedIntOutputStream occurrenceWriter = new PackedIntOutputStream(bwtOutputStream); + IntPackedOutputStream occurrenceWriter = new IntPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN); occurrenceWriter.write(occurrences); occurrenceWriter.flush(); @@ -186,7 +186,7 @@ public class CreateBWTFromReference { sequenceOutputStream.close(); OutputStream saOutputStream = new BufferedOutputStream(new FileOutputStream(saFile)); - PackedIntOutputStream saIntWriter = new PackedIntOutputStream(saOutputStream); + IntPackedOutputStream saIntWriter = new IntPackedOutputStream(saOutputStream,ByteOrder.LITTLE_ENDIAN); // SA file format is 'primary' (= SA-1[0]?), occurrence array, interval, sequence length, SA[] saIntWriter.write(inverseSuffixArray[0]); @@ -198,8 +198,17 @@ public class CreateBWTFromReference { saIntWriter.close(); File existingBwtFile = new File(inputFileName+".bwt"); - WordPackedInputStream inputStream = new WordPackedInputStream(existingBwtFile,ByteOrder.LITTLE_ENDIAN); - byte[] existingBwt = inputStream.read(); + InputStream existingBwtStream = new BufferedInputStream(new FileInputStream(existingBwtFile)); + + IntPackedInputStream existingIntReader = new IntPackedInputStream(existingBwtStream,ByteOrder.LITTLE_ENDIAN); + + int existingFirstInverseSA = existingIntReader.read(); + + int[] existingOccurrences = new int[4]; + existingIntReader.read(existingOccurrences); + + BasePackedInputStream inputStream = new BasePackedInputStream(Integer.class,existingBwtStream,ByteOrder.LITTLE_ENDIAN); + byte[] existingBwt = inputStream.read(existingOccurrences[3]); String existingBwtAsString = new String(existingBwt); System.out.printf("Existing BWT: %s...%n",existingBwtAsString.substring(0,80)); diff --git a/java/src/org/broadinstitute/sting/bwa/IntPackedInputStream.java b/java/src/org/broadinstitute/sting/bwa/IntPackedInputStream.java new file mode 100644 index 000000000..47e4ad71d --- /dev/null +++ b/java/src/org/broadinstitute/sting/bwa/IntPackedInputStream.java @@ -0,0 +1,76 @@ +package org.broadinstitute.sting.bwa; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Read a set of integers packed into + * + * @author mhanna + * @version 0.1 + */ +public class IntPackedInputStream { + /** + * Ultimate target for the occurrence array. + */ + private final InputStream targetInputStream; + + /** + * The byte order in which integer input data appears. + */ + private final ByteBuffer buffer; + + /** + * Create a new PackedIntInputStream, writing to the given target file. + * @param inputFile target input file. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws java.io.IOException if an I/O error occurs. + */ + public IntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { + this(new FileInputStream(inputFile),byteOrder); + } + + /** + * Read ints from the given InputStream. + * @param inputStream Input stream from which to read ints. + * @param byteOrder Endianness to use when writing a list of integers. + * @throws IOException if an I/O error occurs. + */ + public IntPackedInputStream(InputStream inputStream, ByteOrder byteOrder) throws IOException { + this.targetInputStream = inputStream; + this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); + } + + /** + * Read a datum from the input stream. + * @return The next input datum in the stream. + * @throws IOException if an I/O error occurs. + */ + public int read() throws IOException { + int[] data = new int[1]; + read(data); + return data[0]; + } + + /** + * Read the data from the input stream. + * @param data placeholder for input data. + * @throws IOException if an I/O error occurs. + */ + public void read( int[] data ) throws IOException { + for(int i = 0; i < data.length; i++) { + targetInputStream.read(buffer.array()); + data[i] = buffer.getInt(); + buffer.rewind(); + } + } + + /** + * Closes the given output stream. + * @throws IOException if an I/O error occurs. + */ + public void close() throws IOException { + targetInputStream.close(); + } +} diff --git a/java/src/org/broadinstitute/sting/bwa/PackedIntOutputStream.java b/java/src/org/broadinstitute/sting/bwa/IntPackedOutputStream.java similarity index 75% rename from java/src/org/broadinstitute/sting/bwa/PackedIntOutputStream.java rename to java/src/org/broadinstitute/sting/bwa/IntPackedOutputStream.java index c530676b7..315976b10 100755 --- a/java/src/org/broadinstitute/sting/bwa/PackedIntOutputStream.java +++ b/java/src/org/broadinstitute/sting/bwa/IntPackedOutputStream.java @@ -25,45 +25,46 @@ package org.broadinstitute.sting.bwa; -import org.broadinstitute.sting.utils.StingException; - import java.io.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; /** - * Writes an occurrence array to the output file. + * Writes an list of integers to the output file. * * @author mhanna * @version 0.1 */ -public class PackedIntOutputStream { - /** - * How many bytes does it take to hold an integer in Java? - */ - private static final int INT_SIZE_IN_BYTES = 4; - +public class IntPackedOutputStream { /** * Ultimate target for the occurrence array. */ private final OutputStream targetOutputStream; + /** + * A fixed-size buffer for int-packed data. + */ + private final ByteBuffer buffer; + /** * Create a new PackedIntOutputStream, writing to the given target file. * @param outputFile target output file. + * @param byteOrder Endianness to use when writing a list of integers. * @throws IOException if an I/O error occurs. */ - public PackedIntOutputStream( File outputFile ) throws IOException { - this(new FileOutputStream(outputFile)); + public IntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { + this(new FileOutputStream(outputFile),byteOrder); } /** * Write packed ints to the given OutputStream. * @param outputStream Output stream to which to write packed ints. + * @param byteOrder Endianness to use when writing a list of integers. * @throws IOException if an I/O error occurs. */ - public PackedIntOutputStream( OutputStream outputStream ) throws IOException { + public IntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) throws IOException { this.targetOutputStream = outputStream; + buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); } /** @@ -72,7 +73,7 @@ public class PackedIntOutputStream { * @throws IOException if an I/O error occurs. */ public void write( int datum ) throws IOException { - ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES).order(ByteOrder.LITTLE_ENDIAN); + buffer.rewind(); buffer.putInt(datum); targetOutputStream.write(buffer.array()); } @@ -87,6 +88,13 @@ public class PackedIntOutputStream { write(datum); } + /** + * Write the given chunk of data to the input stream. + * @param data data to write. + * @param offset position at which to start. + * @param length number of ints to write. + * @throws IOException if an I/O error occurs. + */ public void write( int[] data, int offset, int length ) throws IOException { for( int i = offset; i < offset+length; i++ ) write(data[i]); diff --git a/java/src/org/broadinstitute/sting/bwa/WordPackedInputStream.java b/java/src/org/broadinstitute/sting/bwa/WordPackedInputStream.java deleted file mode 100644 index 0abe4bdda..000000000 --- a/java/src/org/broadinstitute/sting/bwa/WordPackedInputStream.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.broadinstitute.sting.bwa; - -import java.io.*; -import java.nio.ByteOrder; -import java.nio.ByteBuffer; -import java.util.List; -import java.util.ArrayList; - -/** - * Reads a word-packed version of the input stream. - * - * @author mhanna - * @version 0.1 - */ -public class WordPackedInputStream { - - /** - * Ultimate source for packed bases. - */ - private final InputStream targetInputStream; - - /** - * A fixed-size buffer for word-packed data. - */ - private final ByteBuffer buffer; - - public WordPackedInputStream( File inputFile, ByteOrder byteOrder ) throws FileNotFoundException { - this.targetInputStream = new BufferedInputStream(new FileInputStream(inputFile)); - this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); - } - - /** - * Read the entire contents of the input stream. - * @throws IOException if an I/O error occurs. - */ - public byte[] read() throws IOException { - // Skip over header info. - for( int i = 0; i < 5; i++ ) { - targetInputStream.read(buffer.array()); - System.out.println("Skipping over: " + buffer.getInt()); - buffer.rewind(); - } - - List bwtList = new ArrayList(); - while(targetInputStream.read(buffer.array()) > 0) { - int packedWord = buffer.getInt(); - for( int i = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE - 1; i >= 0; i-- ) { - byte packedByte = (byte)((packedWord >> i*2) & 0x3); - bwtList.add(PackUtils.unpackBase(packedByte)); - } - buffer.rewind(); - } - - byte[] bwt = new byte[bwtList.size()]; - for(int i = 0; i < bwtList.size(); i++) - bwt[i] = bwtList.get(i); - - return bwt; - } - -}