Successfully writing .sa files.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1549 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-09-08 17:34:34 +00:00
parent 600c234643
commit f22f590192
2 changed files with 67 additions and 27 deletions

View File

@ -99,6 +99,13 @@ public class CreateBWTFromReference {
return compressedSuffixArray; return compressedSuffixArray;
} }
private int[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
int[] inverseCompressedSuffixArray = new int[compressedSuffixArray.length];
for( int i = 0; i < compressedSuffixArray.length; i++ )
inverseCompressedSuffixArray[compressedSuffixArray[i]] = i;
return inverseCompressedSuffixArray;
}
private byte[] createBWT( String sequence, int[] suffixArray ) { private byte[] createBWT( String sequence, int[] suffixArray ) {
byte[] bwt = new byte[suffixArray.length]; byte[] bwt = new byte[suffixArray.length];
for( int i = 0; i < suffixArray.length; i++ ) { for( int i = 0; i < suffixArray.length; i++ ) {
@ -111,16 +118,19 @@ public class CreateBWTFromReference {
} }
public static void main( String argv[] ) throws IOException { public static void main( String argv[] ) throws IOException {
if( argv.length != 2 ) { if( argv.length != 3 ) {
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output>"); System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output sa>");
return; return;
} }
String inputFileName = argv[0]; String inputFileName = argv[0];
File inputFile = new File(inputFileName); File inputFile = new File(inputFileName);
String outputFileName = argv[1]; String bwtFileName = argv[1];
File outputFile = new File(outputFileName); File bwtFile = new File(bwtFileName);
String saFileName = argv[2];
File saFile = new File(saFileName);
CreateBWTFromReference creator = new CreateBWTFromReference(); CreateBWTFromReference creator = new CreateBWTFromReference();
@ -144,6 +154,12 @@ public class CreateBWTFromReference {
reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA]; reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA];
} }
// Create the data structure for the inverse compressed suffix array and print diagnostics.
int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray);
for( int i = 0; i < 8; i++ ) {
System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]);
}
// Count the occurences of each given base. // Count the occurences of each given base.
int[] occurrences = creator.countOccurrences(sequence); int[] occurrences = creator.countOccurrences(sequence);
System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences[0],occurrences[1],occurrences[2],occurrences[3]); System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences[0],occurrences[1],occurrences[2],occurrences[3]);
@ -152,27 +168,39 @@ public class CreateBWTFromReference {
byte[] bwt = creator.createBWT(sequence, suffixArray); byte[] bwt = creator.createBWT(sequence, suffixArray);
String bwtAsString = new String(bwt); String bwtAsString = new String(bwt);
System.out.printf("BWT:%n"); //System.out.printf("BWT:%n");
while( bwtAsString.length() > 0 ) { while( bwtAsString.length() > 0 ) {
int end = Math.min( 80, bwtAsString.length() ); int end = Math.min( 80, bwtAsString.length() );
//System.out.printf("%s%n", bwtAsString.substring(0,end)); //System.out.printf("%s%n", bwtAsString.substring(0,end));
bwtAsString = bwtAsString.substring(end); bwtAsString = bwtAsString.substring(end);
} }
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); OutputStream bwtOutputStream = new BufferedOutputStream(new FileOutputStream(bwtFile));
ByteBuffer buffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); ByteBuffer buffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN);
buffer.putInt(inverseSuffixArray[0]); buffer.putInt(inverseSuffixArray[0]);
outputStream.write(buffer.array()); bwtOutputStream.write(buffer.array());
outputStream.flush(); bwtOutputStream.flush();
OccurrenceOutputStream occurrenceWriter = new OccurrenceOutputStream(outputStream); PackedIntOutputStream occurrenceWriter = new PackedIntOutputStream(bwtOutputStream);
occurrenceWriter.write(occurrences); occurrenceWriter.write(occurrences);
occurrenceWriter.flush(); occurrenceWriter.flush();
WordPackedOutputStream bwtOutputStream = new WordPackedOutputStream(outputStream,ByteOrder.LITTLE_ENDIAN); WordPackedOutputStream sequenceOutputStream = new WordPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN);
bwtOutputStream.write(bwt); sequenceOutputStream.write(bwt);
bwtOutputStream.close(); sequenceOutputStream.close();
OutputStream saOutputStream = new BufferedOutputStream(new FileOutputStream(saFile));
PackedIntOutputStream saIntWriter = new PackedIntOutputStream(saOutputStream);
// SA file format is 'primary' (= SA-1[0]?), occurrence array, interval, sequence length, SA[]
saIntWriter.write(inverseSuffixArray[0]);
saIntWriter.write(occurrences);
saIntWriter.write(1);
saIntWriter.write(suffixArray.length-1);
saIntWriter.write(suffixArray, 1, suffixArray.length-1);
saIntWriter.close();
} }
/** /**

View File

@ -37,7 +37,7 @@ import java.nio.ByteOrder;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class OccurrenceOutputStream { public class PackedIntOutputStream {
/** /**
* How many bytes does it take to hold an integer in Java? * How many bytes does it take to hold an integer in Java?
*/ */
@ -49,35 +49,47 @@ public class OccurrenceOutputStream {
private final OutputStream targetOutputStream; private final OutputStream targetOutputStream;
/** /**
* Create a new OccurrenceArrayOutputStream, writing to the given target file. * Create a new PackedIntOutputStream, writing to the given target file.
* @param outputFile target output file. * @param outputFile target output file.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public OccurrenceOutputStream( File outputFile ) throws IOException { public PackedIntOutputStream( File outputFile ) throws IOException {
this(new FileOutputStream(outputFile)); this(new FileOutputStream(outputFile));
} }
/** /**
* Write occurrence array to the given OutputStream. * Write packed ints to the given OutputStream.
* @param outputStream Output stream to which to write packed bases. * @param outputStream Output stream to which to write packed ints.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public OccurrenceOutputStream( OutputStream outputStream ) throws IOException { public PackedIntOutputStream( OutputStream outputStream ) throws IOException {
this.targetOutputStream = outputStream; this.targetOutputStream = outputStream;
} }
/** /**
* Write the cumulative occurrences to the output stream. * Write the data to the output stream.
* @param occurrences occurrences to write. occurrences.length must match alphabet size. * @param datum datum to write.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public void write( int[] occurrences ) throws IOException { public void write( int datum ) throws IOException {
if( occurrences.length > BytePackedOutputStream.ALPHABET_SIZE ) ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES).order(ByteOrder.LITTLE_ENDIAN);
throw new StingException("Wrong number of occurrence data points; expected " + BytePackedOutputStream.ALPHABET_SIZE); buffer.putInt(datum);
ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES*occurrences.length).order(ByteOrder.LITTLE_ENDIAN); targetOutputStream.write(buffer.array());
for(int occurrence: occurrences) }
buffer.putInt(occurrence);
targetOutputStream.write(buffer.array()); /**
* Write the data to the output stream.
* @param data data to write. occurrences.length must match alphabet size.
* @throws IOException if an I/O error occurs.
*/
public void write( int[] data ) throws IOException {
for(int datum: data)
write(datum);
}
public void write( int[] data, int offset, int length ) throws IOException {
for( int i = offset; i < offset+length; i++ )
write(data[i]);
} }
/** /**