Successfully writing .sa files.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1549 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
600c234643
commit
f22f590192
|
|
@ -99,6 +99,13 @@ public class CreateBWTFromReference {
|
|||
return compressedSuffixArray;
|
||||
}
|
||||
|
||||
private int[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
|
||||
int[] inverseCompressedSuffixArray = new int[compressedSuffixArray.length];
|
||||
for( int i = 0; i < compressedSuffixArray.length; i++ )
|
||||
inverseCompressedSuffixArray[compressedSuffixArray[i]] = i;
|
||||
return inverseCompressedSuffixArray;
|
||||
}
|
||||
|
||||
private byte[] createBWT( String sequence, int[] suffixArray ) {
|
||||
byte[] bwt = new byte[suffixArray.length];
|
||||
for( int i = 0; i < suffixArray.length; i++ ) {
|
||||
|
|
@ -111,16 +118,19 @@ public class CreateBWTFromReference {
|
|||
}
|
||||
|
||||
public static void main( String argv[] ) throws IOException {
|
||||
if( argv.length != 2 ) {
|
||||
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output>");
|
||||
if( argv.length != 3 ) {
|
||||
System.out.println("USAGE: CreateBWTFromReference <input>.fasta <output bwt> <output sa>");
|
||||
return;
|
||||
}
|
||||
|
||||
String inputFileName = argv[0];
|
||||
File inputFile = new File(inputFileName);
|
||||
|
||||
String outputFileName = argv[1];
|
||||
File outputFile = new File(outputFileName);
|
||||
String bwtFileName = argv[1];
|
||||
File bwtFile = new File(bwtFileName);
|
||||
|
||||
String saFileName = argv[2];
|
||||
File saFile = new File(saFileName);
|
||||
|
||||
CreateBWTFromReference creator = new CreateBWTFromReference();
|
||||
|
||||
|
|
@ -144,6 +154,12 @@ public class CreateBWTFromReference {
|
|||
reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA];
|
||||
}
|
||||
|
||||
// Create the data structure for the inverse compressed suffix array and print diagnostics.
|
||||
int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray);
|
||||
for( int i = 0; i < 8; i++ ) {
|
||||
System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]);
|
||||
}
|
||||
|
||||
// Count the occurences of each given base.
|
||||
int[] occurrences = creator.countOccurrences(sequence);
|
||||
System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences[0],occurrences[1],occurrences[2],occurrences[3]);
|
||||
|
|
@ -152,27 +168,39 @@ public class CreateBWTFromReference {
|
|||
byte[] bwt = creator.createBWT(sequence, suffixArray);
|
||||
|
||||
String bwtAsString = new String(bwt);
|
||||
System.out.printf("BWT:%n");
|
||||
//System.out.printf("BWT:%n");
|
||||
while( bwtAsString.length() > 0 ) {
|
||||
int end = Math.min( 80, bwtAsString.length() );
|
||||
//System.out.printf("%s%n", bwtAsString.substring(0,end));
|
||||
bwtAsString = bwtAsString.substring(end);
|
||||
}
|
||||
|
||||
OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
|
||||
OutputStream bwtOutputStream = new BufferedOutputStream(new FileOutputStream(bwtFile));
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN);
|
||||
buffer.putInt(inverseSuffixArray[0]);
|
||||
outputStream.write(buffer.array());
|
||||
outputStream.flush();
|
||||
bwtOutputStream.write(buffer.array());
|
||||
bwtOutputStream.flush();
|
||||
|
||||
OccurrenceOutputStream occurrenceWriter = new OccurrenceOutputStream(outputStream);
|
||||
PackedIntOutputStream occurrenceWriter = new PackedIntOutputStream(bwtOutputStream);
|
||||
occurrenceWriter.write(occurrences);
|
||||
occurrenceWriter.flush();
|
||||
|
||||
WordPackedOutputStream bwtOutputStream = new WordPackedOutputStream(outputStream,ByteOrder.LITTLE_ENDIAN);
|
||||
bwtOutputStream.write(bwt);
|
||||
bwtOutputStream.close();
|
||||
WordPackedOutputStream sequenceOutputStream = new WordPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN);
|
||||
sequenceOutputStream.write(bwt);
|
||||
sequenceOutputStream.close();
|
||||
|
||||
OutputStream saOutputStream = new BufferedOutputStream(new FileOutputStream(saFile));
|
||||
PackedIntOutputStream saIntWriter = new PackedIntOutputStream(saOutputStream);
|
||||
|
||||
// SA file format is 'primary' (= SA-1[0]?), occurrence array, interval, sequence length, SA[]
|
||||
saIntWriter.write(inverseSuffixArray[0]);
|
||||
saIntWriter.write(occurrences);
|
||||
saIntWriter.write(1);
|
||||
saIntWriter.write(suffixArray.length-1);
|
||||
saIntWriter.write(suffixArray, 1, suffixArray.length-1);
|
||||
|
||||
saIntWriter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ import java.nio.ByteOrder;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class OccurrenceOutputStream {
|
||||
public class PackedIntOutputStream {
|
||||
/**
|
||||
* How many bytes does it take to hold an integer in Java?
|
||||
*/
|
||||
|
|
@ -49,35 +49,47 @@ public class OccurrenceOutputStream {
|
|||
private final OutputStream targetOutputStream;
|
||||
|
||||
/**
|
||||
* Create a new OccurrenceArrayOutputStream, writing to the given target file.
|
||||
* Create a new PackedIntOutputStream, writing to the given target file.
|
||||
* @param outputFile target output file.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public OccurrenceOutputStream( File outputFile ) throws IOException {
|
||||
public PackedIntOutputStream( File outputFile ) throws IOException {
|
||||
this(new FileOutputStream(outputFile));
|
||||
}
|
||||
|
||||
/**
|
||||
* Write occurrence array to the given OutputStream.
|
||||
* @param outputStream Output stream to which to write packed bases.
|
||||
* Write packed ints to the given OutputStream.
|
||||
* @param outputStream Output stream to which to write packed ints.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public OccurrenceOutputStream( OutputStream outputStream ) throws IOException {
|
||||
public PackedIntOutputStream( OutputStream outputStream ) throws IOException {
|
||||
this.targetOutputStream = outputStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the cumulative occurrences to the output stream.
|
||||
* @param occurrences occurrences to write. occurrences.length must match alphabet size.
|
||||
* Write the data to the output stream.
|
||||
* @param datum datum to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int[] occurrences ) throws IOException {
|
||||
if( occurrences.length > BytePackedOutputStream.ALPHABET_SIZE )
|
||||
throw new StingException("Wrong number of occurrence data points; expected " + BytePackedOutputStream.ALPHABET_SIZE);
|
||||
ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES*occurrences.length).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for(int occurrence: occurrences)
|
||||
buffer.putInt(occurrence);
|
||||
targetOutputStream.write(buffer.array());
|
||||
public void write( int datum ) throws IOException {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||
buffer.putInt(datum);
|
||||
targetOutputStream.write(buffer.array());
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the data to the output stream.
|
||||
* @param data data to write. occurrences.length must match alphabet size.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int[] data ) throws IOException {
|
||||
for(int datum: data)
|
||||
write(datum);
|
||||
}
|
||||
|
||||
public void write( int[] data, int offset, int length ) throws IOException {
|
||||
for( int i = offset; i < offset+length; i++ )
|
||||
write(data[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
Loading…
Reference in New Issue