Cleanup. Separate common packing functionality into utils class. Make base packing utility as generic as possible.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1566 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3b1e966b4c
commit
43d1c6741c
|
|
@ -0,0 +1,120 @@
|
||||||
|
package org.broadinstitute.sting.bwa;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A general-purpose stream for writing packed bases.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class BasePackedOutputStream<T> {
|
||||||
|
/**
|
||||||
|
* Type of object to pack.
|
||||||
|
*/
|
||||||
|
private final Class<T> type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bases can be stored in the given data structure?
|
||||||
|
*/
|
||||||
|
private final int basesPerType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ultimate target for the packed bases.
|
||||||
|
*/
|
||||||
|
private final OutputStream targetOutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fixed-size buffer for word-packed data.
|
||||||
|
*/
|
||||||
|
private final ByteBuffer buffer;
|
||||||
|
|
||||||
|
public BasePackedOutputStream( Class<T> type, File outputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||||
|
this(type,new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write packed bases to the given output stream.
|
||||||
|
* @param type Type of data to pack bases into.
|
||||||
|
* @param outputStream Output stream to which to write packed bases.
|
||||||
|
* @param byteOrder Switch between big endian / little endian when reading / writing files.
|
||||||
|
*/
|
||||||
|
public BasePackedOutputStream( Class<T> type, OutputStream outputStream, ByteOrder byteOrder) {
|
||||||
|
this.targetOutputStream = outputStream;
|
||||||
|
this.type = type;
|
||||||
|
basesPerType = PackUtils.bitsInType(type)/PackUtils.BITS_PER_BASE;
|
||||||
|
this.buffer = ByteBuffer.allocate(basesPerType/PackUtils.ALPHABET_SIZE).order(byteOrder);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes an array of bases to the target output stream.
|
||||||
|
* @param bases List of bases to write.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void write( byte[] bases ) throws IOException {
|
||||||
|
int packedBases = 0;
|
||||||
|
int positionInPack = 0;
|
||||||
|
|
||||||
|
for(byte base: bases) {
|
||||||
|
packedBases = packBase(base, packedBases, positionInPack);
|
||||||
|
|
||||||
|
// Increment the packed counter. If all possible bases have been squeezed into this byte, write it out.
|
||||||
|
positionInPack = ++positionInPack % basesPerType;
|
||||||
|
if( positionInPack == 0 ) {
|
||||||
|
writePackedBases(packedBases);
|
||||||
|
packedBases = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( positionInPack > 0 )
|
||||||
|
writePackedBases(packedBases);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush the contents of the OutputStream to disk.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void flush() throws IOException {
|
||||||
|
targetOutputStream.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the given output stream.
|
||||||
|
* @throws IOException if an I/O error occurs.
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
targetOutputStream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pack the given base into the basepack.
|
||||||
|
* @param base The base to pack.
|
||||||
|
* @param basePack Target for the pack operation.
|
||||||
|
* @param position Position within the pack to which to add the base.
|
||||||
|
* @return The packed integer.
|
||||||
|
*/
|
||||||
|
private int packBase( byte base, int basePack, int position ) {
|
||||||
|
basePack |= (PackUtils.packBase(base) << 2*(basesPerType-position-1));
|
||||||
|
return basePack;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the given packed base structure to the output file.
|
||||||
|
* @param packedBases Packed bases to write.
|
||||||
|
* @throws IOException on error writing to the file.
|
||||||
|
*/
|
||||||
|
private void writePackedBases(int packedBases) throws IOException {
|
||||||
|
buffer.rewind();
|
||||||
|
if( type == Integer.class )
|
||||||
|
buffer.putInt(packedBases);
|
||||||
|
else if( type == Byte.class )
|
||||||
|
buffer.put((byte)packedBases);
|
||||||
|
else
|
||||||
|
throw new StingException("Cannot pack bases into type " + type.getName());
|
||||||
|
targetOutputStream.write(buffer.array());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,157 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2009 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.bwa;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write packed bases to an output stream. Pack each base into 2 bits.
|
|
||||||
*
|
|
||||||
* @author mhanna
|
|
||||||
* @version 0.1
|
|
||||||
*/
|
|
||||||
public class BytePackedOutputStream {
|
|
||||||
/**
|
|
||||||
* How many possible bases can be encoded?
|
|
||||||
*/
|
|
||||||
public static final int ALPHABET_SIZE = 4;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ultimate target for the packed bases.
|
|
||||||
*/
|
|
||||||
private final OutputStream targetOutputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next byte to write to the output stream. Will be added
|
|
||||||
* to the output stream when enough bases are accumulated, or when
|
|
||||||
* the file is closed.
|
|
||||||
*/
|
|
||||||
private byte packedBases;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Where will the next base be embedded into packedBases?
|
|
||||||
*/
|
|
||||||
private int positionInPack = 0;
|
|
||||||
|
|
||||||
public BytePackedOutputStream( File outputFile ) throws FileNotFoundException {
|
|
||||||
this(new BufferedOutputStream(new FileOutputStream(outputFile)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write packed bases to the given output stream.
|
|
||||||
* @param outputStream Output stream to which to write packed bases.
|
|
||||||
*/
|
|
||||||
public BytePackedOutputStream( OutputStream outputStream ) {
|
|
||||||
this.targetOutputStream = outputStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write a given base to the output stream.
|
|
||||||
* @param base Base to write.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void write( byte base ) throws IOException {
|
|
||||||
packedBases |= (getPackedRepresentation(base) << 2*(ALPHABET_SIZE-positionInPack-1));
|
|
||||||
|
|
||||||
// Increment the packed counter. If all possible bases have been squeezed into this byte, write it out.
|
|
||||||
positionInPack = ++positionInPack % ALPHABET_SIZE;
|
|
||||||
if( positionInPack == 0 ) {
|
|
||||||
targetOutputStream.write(packedBases);
|
|
||||||
packedBases = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes an array of bases to the target output stream.
|
|
||||||
* @param bases List of bases to write.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void write( byte[] bases ) throws IOException {
|
|
||||||
for(byte base: bases) write(base);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush the contents of the OutputStream to disk.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void flush() throws IOException {
|
|
||||||
targetOutputStream.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes the given output stream.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void close() throws IOException {
|
|
||||||
// Write (incomplete) block in file, and number of bases in that last byte.
|
|
||||||
if( positionInPack > 0 ) {
|
|
||||||
targetOutputStream.write(packedBases);
|
|
||||||
targetOutputStream.write(positionInPack);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
targetOutputStream.write(ALPHABET_SIZE);
|
|
||||||
|
|
||||||
targetOutputStream.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b.
|
|
||||||
* @param base ASCII value for the base to pack.
|
|
||||||
* @return A byte from 0-3 indicating the base's packed value.
|
|
||||||
*/
|
|
||||||
public static byte getPackedRepresentation(byte base) {
|
|
||||||
switch( base ) {
|
|
||||||
case 'A':
|
|
||||||
return 0;
|
|
||||||
case 'C':
|
|
||||||
return 1;
|
|
||||||
case 'G':
|
|
||||||
return 2;
|
|
||||||
case 'T':
|
|
||||||
return 3;
|
|
||||||
default:
|
|
||||||
throw new StingException("Unknown base type: " + base);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static byte decodePackedRepresentation(byte pack) {
|
|
||||||
switch( pack ) {
|
|
||||||
case 0:
|
|
||||||
return 'A';
|
|
||||||
case 1:
|
|
||||||
return 'C';
|
|
||||||
case 2:
|
|
||||||
return 'G';
|
|
||||||
case 3:
|
|
||||||
return 'T';
|
|
||||||
default:
|
|
||||||
throw new StingException("Unknown pack type: " + pack);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -33,7 +33,6 @@ import net.sf.samtools.util.StringUtil;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
|
|
@ -56,7 +55,7 @@ public class CreateBWTFromReference {
|
||||||
private int[] countOccurrences( String sequence ) {
|
private int[] countOccurrences( String sequence ) {
|
||||||
int occurrences[] = new int[ALPHABET_SIZE];
|
int occurrences[] = new int[ALPHABET_SIZE];
|
||||||
for( char base: sequence.toCharArray() )
|
for( char base: sequence.toCharArray() )
|
||||||
occurrences[ BytePackedOutputStream.getPackedRepresentation((byte)base) ]++;
|
occurrences[PackUtils.packBase((byte)base)]++;
|
||||||
|
|
||||||
// Make occurrences cumulative
|
// Make occurrences cumulative
|
||||||
for( int i = 1; i < ALPHABET_SIZE; i++ )
|
for( int i = 1; i < ALPHABET_SIZE; i++ )
|
||||||
|
|
@ -182,7 +181,7 @@ public class CreateBWTFromReference {
|
||||||
occurrenceWriter.write(occurrences);
|
occurrenceWriter.write(occurrences);
|
||||||
occurrenceWriter.flush();
|
occurrenceWriter.flush();
|
||||||
|
|
||||||
WordPackedOutputStream sequenceOutputStream = new WordPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN);
|
BasePackedOutputStream<Integer> sequenceOutputStream = new BasePackedOutputStream<Integer>(Integer.class,bwtOutputStream,ByteOrder.LITTLE_ENDIAN);
|
||||||
sequenceOutputStream.write(bwt);
|
sequenceOutputStream.write(bwt);
|
||||||
sequenceOutputStream.close();
|
sequenceOutputStream.close();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a .PAC file from a given reference.
|
* Generate a .PAC file from a given reference.
|
||||||
|
|
@ -53,9 +54,13 @@ public class CreatePACFromReference {
|
||||||
|
|
||||||
// Target file for output
|
// Target file for output
|
||||||
File outputFile = new File(argv[1]);
|
File outputFile = new File(argv[1]);
|
||||||
BytePackedOutputStream outputStream = new BytePackedOutputStream(outputFile);
|
OutputStream outputStream = new FileOutputStream(outputFile);
|
||||||
|
|
||||||
|
BasePackedOutputStream<Byte> basePackedOutputStream = new BasePackedOutputStream<Byte>(Byte.class, outputStream, ByteOrder.BIG_ENDIAN);
|
||||||
|
basePackedOutputStream.write(sequence.getBases());
|
||||||
|
|
||||||
|
outputStream.write(sequence.getBases().length%PackUtils.ALPHABET_SIZE);
|
||||||
|
|
||||||
outputStream.write(sequence.getBases());
|
|
||||||
outputStream.close();
|
outputStream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,82 @@
|
||||||
|
package org.broadinstitute.sting.bwa;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities designed for packing / unpacking bases.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class PackUtils {
|
||||||
|
/**
|
||||||
|
* How many possible bases can be encoded?
|
||||||
|
*/
|
||||||
|
public static final int ALPHABET_SIZE = 4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bits does it take to store a single base?
|
||||||
|
*/
|
||||||
|
public static final int BITS_PER_BASE = (int)(Math.log(ALPHABET_SIZE)/Math.log(2));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bits fit into a single byte?
|
||||||
|
*/
|
||||||
|
public static final int BITS_PER_BYTE = 8;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many bits can a given type hold?
|
||||||
|
* @param type Type to test.
|
||||||
|
* @return Number of bits that the given type can hold.
|
||||||
|
*/
|
||||||
|
public static final int bitsInType( Class<?> type ) {
|
||||||
|
try {
|
||||||
|
long typeSize = type.getField("MAX_VALUE").getLong(null) - type.getField("MIN_VALUE").getLong(null)+1;
|
||||||
|
long intTypeSize = (long)Integer.MAX_VALUE - (long)Integer.MIN_VALUE + 1;
|
||||||
|
if( typeSize > intTypeSize )
|
||||||
|
throw new StingException("Cannot determine number of bits available in type: " + type.getName());
|
||||||
|
return (int)(Math.log(typeSize)/Math.log(2));
|
||||||
|
}
|
||||||
|
catch( NoSuchFieldException ex ) {
|
||||||
|
throw new StingException("Cannot determine number of bits available in type: " + type.getName(),ex);
|
||||||
|
}
|
||||||
|
catch( IllegalAccessException ex ) {
|
||||||
|
throw new StingException("Cannot determine number of bits available in type: " + type.getName(),ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the two-bit representation of a base. A=00b, C=01b, G=10b, T=11b.
|
||||||
|
* @param base ASCII value for the base to pack.
|
||||||
|
* @return A byte from 0-3 indicating the base's packed value.
|
||||||
|
*/
|
||||||
|
public static byte packBase(byte base) {
|
||||||
|
switch( base ) {
|
||||||
|
case 'A':
|
||||||
|
return 0;
|
||||||
|
case 'C':
|
||||||
|
return 1;
|
||||||
|
case 'G':
|
||||||
|
return 2;
|
||||||
|
case 'T':
|
||||||
|
return 3;
|
||||||
|
default:
|
||||||
|
throw new StingException("Unknown base type: " + base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte unpackBase(byte pack) {
|
||||||
|
switch( pack ) {
|
||||||
|
case 0:
|
||||||
|
return 'A';
|
||||||
|
case 1:
|
||||||
|
return 'C';
|
||||||
|
case 2:
|
||||||
|
return 'G';
|
||||||
|
case 3:
|
||||||
|
return 'T';
|
||||||
|
default:
|
||||||
|
throw new StingException("Unknown pack type: " + pack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -26,7 +26,7 @@ public class WordPackedInputStream {
|
||||||
|
|
||||||
public WordPackedInputStream( File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
public WordPackedInputStream( File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||||
this.targetInputStream = new BufferedInputStream(new FileInputStream(inputFile));
|
this.targetInputStream = new BufferedInputStream(new FileInputStream(inputFile));
|
||||||
this.buffer = ByteBuffer.allocate(WordPackedOutputStream.BASES_PER_WORD/BytePackedOutputStream.ALPHABET_SIZE).order(byteOrder);
|
this.buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -44,9 +44,9 @@ public class WordPackedInputStream {
|
||||||
List<Byte> bwtList = new ArrayList<Byte>();
|
List<Byte> bwtList = new ArrayList<Byte>();
|
||||||
while(targetInputStream.read(buffer.array()) > 0) {
|
while(targetInputStream.read(buffer.array()) > 0) {
|
||||||
int packedWord = buffer.getInt();
|
int packedWord = buffer.getInt();
|
||||||
for( int i = WordPackedOutputStream.BASES_PER_WORD-1; i >= 0; i-- ) {
|
for( int i = PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BASE - 1; i >= 0; i-- ) {
|
||||||
byte packedByte = (byte)((packedWord >> i*2) & 0x3);
|
byte packedByte = (byte)((packedWord >> i*2) & 0x3);
|
||||||
bwtList.add(BytePackedOutputStream.decodePackedRepresentation(packedByte));
|
bwtList.add(PackUtils.unpackBase(packedByte));
|
||||||
}
|
}
|
||||||
buffer.rewind();
|
buffer.rewind();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,131 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2009 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.bwa;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.ByteOrder;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Word-pack bases into the output stream. Bytes are stored as
|
|
||||||
* little-endian unsigned ints.
|
|
||||||
*
|
|
||||||
* @author mhanna
|
|
||||||
* @version 0.1
|
|
||||||
*/
|
|
||||||
public class WordPackedOutputStream {
|
|
||||||
/**
|
|
||||||
* How many bases can be stored in the given word?
|
|
||||||
*/
|
|
||||||
public static final int BASES_PER_WORD = 16;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ultimate target for the packed bases.
|
|
||||||
*/
|
|
||||||
private final OutputStream targetOutputStream;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The next byte to write to the output stream. Will be added
|
|
||||||
* to the output stream when enough bases are accumulated, or when
|
|
||||||
* the file is closed.
|
|
||||||
*/
|
|
||||||
private int packedBases;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Where will the next base be embedded into packedBases?
|
|
||||||
*/
|
|
||||||
private int positionInPack = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A fixed-size buffer for word-packed data.
|
|
||||||
*/
|
|
||||||
private final ByteBuffer buffer;
|
|
||||||
|
|
||||||
public WordPackedOutputStream( File outputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
|
||||||
this(new BufferedOutputStream(new FileOutputStream(outputFile)),byteOrder);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write packed bases to the given output stream.
|
|
||||||
* @param outputStream Output stream to which to write packed bases.
|
|
||||||
* @param byteOrder Switch between big endian / little endian when reading / writing files.
|
|
||||||
*/
|
|
||||||
public WordPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) {
|
|
||||||
this.targetOutputStream = outputStream;
|
|
||||||
this.buffer = ByteBuffer.allocate(BASES_PER_WORD/BytePackedOutputStream.ALPHABET_SIZE).order(byteOrder);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write a given base to the output stream.
|
|
||||||
* @param base Base to write.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void write( byte base ) throws IOException {
|
|
||||||
packedBases |= (BytePackedOutputStream.getPackedRepresentation(base) << 2*(BASES_PER_WORD-positionInPack-1));
|
|
||||||
|
|
||||||
// Increment the packed counter. If all possible bases have been squeezed into this byte, write it out.
|
|
||||||
positionInPack = ++positionInPack % BASES_PER_WORD;
|
|
||||||
if( positionInPack == 0 ) {
|
|
||||||
buffer.rewind();
|
|
||||||
buffer.putInt(packedBases);
|
|
||||||
targetOutputStream.write(buffer.array());
|
|
||||||
packedBases = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes an array of bases to the target output stream.
|
|
||||||
* @param bases List of bases to write.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void write( byte[] bases ) throws IOException {
|
|
||||||
for(byte base: bases) write(base);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush the contents of the OutputStream to disk.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void flush() throws IOException {
|
|
||||||
targetOutputStream.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes the given output stream.
|
|
||||||
* @throws IOException if an I/O error occurs.
|
|
||||||
*/
|
|
||||||
public void close() throws IOException {
|
|
||||||
// Write (incomplete) block in file, and number of bases in that last byte.
|
|
||||||
if( positionInPack > 0 ) {
|
|
||||||
buffer.rewind();
|
|
||||||
buffer.putInt(packedBases);
|
|
||||||
targetOutputStream.write(buffer.array());
|
|
||||||
}
|
|
||||||
targetOutputStream.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue