Cleanup of int-packed file readers / writers. All primitive writers for BWTs and SAs are in place; time to move on to compound reader / writers.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1571 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d9f3e9493f
commit
bc9fe31cf5
|
|
@ -0,0 +1,68 @@
|
|||
package org.broadinstitute.sting.bwa;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* Reads a packed version of the input stream.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class BasePackedInputStream<T> {
|
||||
/**
|
||||
* Type of object to unpack.
|
||||
*/
|
||||
private final Class<T> type;
|
||||
|
||||
/**
|
||||
* Ultimate source for packed bases.
|
||||
*/
|
||||
private final InputStream targetInputStream;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for word-packed data.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
public BasePackedInputStream( Class<T> type, File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||
this(type,new BufferedInputStream(new FileInputStream(inputFile)),byteOrder);
|
||||
}
|
||||
|
||||
public BasePackedInputStream( Class<T> type, InputStream inputStream, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||
if( type != Integer.class )
|
||||
throw new StingException("Only bases packed into 32-bit words are currently supported by this input stream. Type specified: " + type.getName());
|
||||
|
||||
this.targetInputStream = inputStream;
|
||||
this.type = type;
|
||||
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(type)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the entire contents of the input stream.
|
||||
* @param length number of bases to read from the stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public byte[] read( int length ) throws IOException {
|
||||
byte[] bwt = new byte[length];
|
||||
int packedWord = 0;
|
||||
|
||||
final int basesPerEntry = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE;
|
||||
for( int i = 0; i < length; i++ ) {
|
||||
if( i % basesPerEntry == 0 ) {
|
||||
buffer.rewind();
|
||||
targetInputStream.read(buffer.array());
|
||||
packedWord = buffer.getInt();
|
||||
}
|
||||
|
||||
int position = basesPerEntry - i % basesPerEntry - 1;
|
||||
bwt[i] = PackUtils.unpackBase((byte)((packedWord >> position*PackUtils.BITS_PER_BASE) & 0x3));
|
||||
}
|
||||
|
||||
return bwt;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -50,6 +50,15 @@ public class BasePackedOutputStream<T> {
|
|||
this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the given base to the output stream. Will write only this base; no packing will be performed.
|
||||
* @param base List of bases to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int base ) throws IOException {
|
||||
write( new byte[] { (byte)base } );
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes an array of bases to the target output stream.
|
||||
* @param bases List of bases to write.
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ public class CreateBWTFromReference {
|
|||
bwtOutputStream.write(buffer.array());
|
||||
bwtOutputStream.flush();
|
||||
|
||||
PackedIntOutputStream occurrenceWriter = new PackedIntOutputStream(bwtOutputStream);
|
||||
IntPackedOutputStream occurrenceWriter = new IntPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN);
|
||||
occurrenceWriter.write(occurrences);
|
||||
occurrenceWriter.flush();
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ public class CreateBWTFromReference {
|
|||
sequenceOutputStream.close();
|
||||
|
||||
OutputStream saOutputStream = new BufferedOutputStream(new FileOutputStream(saFile));
|
||||
PackedIntOutputStream saIntWriter = new PackedIntOutputStream(saOutputStream);
|
||||
IntPackedOutputStream saIntWriter = new IntPackedOutputStream(saOutputStream,ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
// SA file format is 'primary' (= SA-1[0]?), occurrence array, interval, sequence length, SA[]
|
||||
saIntWriter.write(inverseSuffixArray[0]);
|
||||
|
|
@ -198,8 +198,17 @@ public class CreateBWTFromReference {
|
|||
saIntWriter.close();
|
||||
|
||||
File existingBwtFile = new File(inputFileName+".bwt");
|
||||
WordPackedInputStream inputStream = new WordPackedInputStream(existingBwtFile,ByteOrder.LITTLE_ENDIAN);
|
||||
byte[] existingBwt = inputStream.read();
|
||||
InputStream existingBwtStream = new BufferedInputStream(new FileInputStream(existingBwtFile));
|
||||
|
||||
IntPackedInputStream existingIntReader = new IntPackedInputStream(existingBwtStream,ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
int existingFirstInverseSA = existingIntReader.read();
|
||||
|
||||
int[] existingOccurrences = new int[4];
|
||||
existingIntReader.read(existingOccurrences);
|
||||
|
||||
BasePackedInputStream inputStream = new BasePackedInputStream<Integer>(Integer.class,existingBwtStream,ByteOrder.LITTLE_ENDIAN);
|
||||
byte[] existingBwt = inputStream.read(existingOccurrences[3]);
|
||||
|
||||
String existingBwtAsString = new String(existingBwt);
|
||||
System.out.printf("Existing BWT: %s...%n",existingBwtAsString.substring(0,80));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
package org.broadinstitute.sting.bwa;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Read a set of integers packed into
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class IntPackedInputStream {
|
||||
/**
|
||||
* Ultimate target for the occurrence array.
|
||||
*/
|
||||
private final InputStream targetInputStream;
|
||||
|
||||
/**
|
||||
* The byte order in which integer input data appears.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
/**
|
||||
* Create a new PackedIntInputStream, writing to the given target file.
|
||||
* @param inputFile target input file.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws java.io.IOException if an I/O error occurs.
|
||||
*/
|
||||
public IntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
|
||||
this(new FileInputStream(inputFile),byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read ints from the given InputStream.
|
||||
* @param inputStream Input stream from which to read ints.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public IntPackedInputStream(InputStream inputStream, ByteOrder byteOrder) throws IOException {
|
||||
this.targetInputStream = inputStream;
|
||||
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a datum from the input stream.
|
||||
* @return The next input datum in the stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public int read() throws IOException {
|
||||
int[] data = new int[1];
|
||||
read(data);
|
||||
return data[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the data from the input stream.
|
||||
* @param data placeholder for input data.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read( int[] data ) throws IOException {
|
||||
for(int i = 0; i < data.length; i++) {
|
||||
targetInputStream.read(buffer.array());
|
||||
data[i] = buffer.getInt();
|
||||
buffer.rewind();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the given output stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
targetInputStream.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -25,45 +25,46 @@
|
|||
|
||||
package org.broadinstitute.sting.bwa;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
/**
|
||||
* Writes an occurrence array to the output file.
|
||||
* Writes an list of integers to the output file.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class PackedIntOutputStream {
|
||||
/**
|
||||
* How many bytes does it take to hold an integer in Java?
|
||||
*/
|
||||
private static final int INT_SIZE_IN_BYTES = 4;
|
||||
|
||||
public class IntPackedOutputStream {
|
||||
/**
|
||||
* Ultimate target for the occurrence array.
|
||||
*/
|
||||
private final OutputStream targetOutputStream;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for int-packed data.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
/**
|
||||
* Create a new PackedIntOutputStream, writing to the given target file.
|
||||
* @param outputFile target output file.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public PackedIntOutputStream( File outputFile ) throws IOException {
|
||||
this(new FileOutputStream(outputFile));
|
||||
public IntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
|
||||
this(new FileOutputStream(outputFile),byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write packed ints to the given OutputStream.
|
||||
* @param outputStream Output stream to which to write packed ints.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public PackedIntOutputStream( OutputStream outputStream ) throws IOException {
|
||||
public IntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) throws IOException {
|
||||
this.targetOutputStream = outputStream;
|
||||
buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -72,7 +73,7 @@ public class PackedIntOutputStream {
|
|||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int datum ) throws IOException {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||
buffer.rewind();
|
||||
buffer.putInt(datum);
|
||||
targetOutputStream.write(buffer.array());
|
||||
}
|
||||
|
|
@ -87,6 +88,13 @@ public class PackedIntOutputStream {
|
|||
write(datum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the given chunk of data to the input stream.
|
||||
* @param data data to write.
|
||||
* @param offset position at which to start.
|
||||
* @param length number of ints to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int[] data, int offset, int length ) throws IOException {
|
||||
for( int i = offset; i < offset+length; i++ )
|
||||
write(data[i]);
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
package org.broadinstitute.sting.bwa;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Reads a word-packed version of the input stream.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class WordPackedInputStream {
|
||||
|
||||
/**
|
||||
* Ultimate source for packed bases.
|
||||
*/
|
||||
private final InputStream targetInputStream;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for word-packed data.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
public WordPackedInputStream( File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||
this.targetInputStream = new BufferedInputStream(new FileInputStream(inputFile));
|
||||
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the entire contents of the input stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public byte[] read() throws IOException {
|
||||
// Skip over header info.
|
||||
for( int i = 0; i < 5; i++ ) {
|
||||
targetInputStream.read(buffer.array());
|
||||
System.out.println("Skipping over: " + buffer.getInt());
|
||||
buffer.rewind();
|
||||
}
|
||||
|
||||
List<Byte> bwtList = new ArrayList<Byte>();
|
||||
while(targetInputStream.read(buffer.array()) > 0) {
|
||||
int packedWord = buffer.getInt();
|
||||
for( int i = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE - 1; i >= 0; i-- ) {
|
||||
byte packedByte = (byte)((packedWord >> i*2) & 0x3);
|
||||
bwtList.add(PackUtils.unpackBase(packedByte));
|
||||
}
|
||||
buffer.rewind();
|
||||
}
|
||||
|
||||
byte[] bwt = new byte[bwtList.size()];
|
||||
for(int i = 0; i < bwtList.size(); i++)
|
||||
bwt[i] = bwtList.get(i);
|
||||
|
||||
return bwt;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue