From f22f590192a5c5dae81d71ea355c17486e9a2700 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 8 Sep 2009 17:34:34 +0000 Subject: [PATCH] Successfully writing .sa files. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1549 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/bwa/CreateBWTFromReference.java | 52 ++++++++++++++----- ...Stream.java => PackedIntOutputStream.java} | 42 +++++++++------ 2 files changed, 67 insertions(+), 27 deletions(-) rename java/src/org/broadinstitute/sting/bwa/{OccurrenceOutputStream.java => PackedIntOutputStream.java} (70%) diff --git a/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java b/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java index 93a1588a8..04d6a730d 100755 --- a/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java +++ b/java/src/org/broadinstitute/sting/bwa/CreateBWTFromReference.java @@ -99,6 +99,13 @@ public class CreateBWTFromReference { return compressedSuffixArray; } + private int[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { + int[] inverseCompressedSuffixArray = new int[compressedSuffixArray.length]; + for( int i = 0; i < compressedSuffixArray.length; i++ ) + inverseCompressedSuffixArray[compressedSuffixArray[i]] = i; + return inverseCompressedSuffixArray; + } + private byte[] createBWT( String sequence, int[] suffixArray ) { byte[] bwt = new byte[suffixArray.length]; for( int i = 0; i < suffixArray.length; i++ ) { @@ -111,16 +118,19 @@ public class CreateBWTFromReference { } public static void main( String argv[] ) throws IOException { - if( argv.length != 2 ) { - System.out.println("USAGE: CreateBWTFromReference .fasta "); + if( argv.length != 3 ) { + System.out.println("USAGE: CreateBWTFromReference .fasta "); return; } String inputFileName = argv[0]; File inputFile = new File(inputFileName); - String outputFileName = argv[1]; - File outputFile = new File(outputFileName); + String bwtFileName = argv[1]; + File bwtFile = new File(bwtFileName); + + String saFileName = argv[2]; + File saFile = new File(saFileName); CreateBWTFromReference creator = new CreateBWTFromReference(); @@ -144,6 +154,12 @@ public class CreateBWTFromReference { reconstructedInverseSA = compressedSuffixArray[reconstructedInverseSA]; } + // Create the data structure for the inverse compressed suffix array and print diagnostics. + int[] inverseCompressedSuffixArray = creator.createInversedCompressedSuffixArray(compressedSuffixArray); + for( int i = 0; i < 8; i++ ) { + System.out.printf("inverseCompressedSuffixArray[%d] = %d%n", i, inverseCompressedSuffixArray[i]); + } + // Count the occurences of each given base. int[] occurrences = creator.countOccurrences(sequence); System.out.printf("Occurrences: a=%d, c=%d, g=%d, t=%d%n",occurrences[0],occurrences[1],occurrences[2],occurrences[3]); @@ -152,27 +168,39 @@ public class CreateBWTFromReference { byte[] bwt = creator.createBWT(sequence, suffixArray); String bwtAsString = new String(bwt); - System.out.printf("BWT:%n"); + //System.out.printf("BWT:%n"); while( bwtAsString.length() > 0 ) { int end = Math.min( 80, bwtAsString.length() ); //System.out.printf("%s%n", bwtAsString.substring(0,end)); bwtAsString = bwtAsString.substring(end); } - OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFile)); + OutputStream bwtOutputStream = new BufferedOutputStream(new FileOutputStream(bwtFile)); ByteBuffer buffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); buffer.putInt(inverseSuffixArray[0]); - outputStream.write(buffer.array()); - outputStream.flush(); + bwtOutputStream.write(buffer.array()); + bwtOutputStream.flush(); - OccurrenceOutputStream occurrenceWriter = new OccurrenceOutputStream(outputStream); + PackedIntOutputStream occurrenceWriter = new PackedIntOutputStream(bwtOutputStream); occurrenceWriter.write(occurrences); occurrenceWriter.flush(); - WordPackedOutputStream bwtOutputStream = new WordPackedOutputStream(outputStream,ByteOrder.LITTLE_ENDIAN); - bwtOutputStream.write(bwt); - bwtOutputStream.close(); + WordPackedOutputStream sequenceOutputStream = new WordPackedOutputStream(bwtOutputStream,ByteOrder.LITTLE_ENDIAN); + sequenceOutputStream.write(bwt); + sequenceOutputStream.close(); + + OutputStream saOutputStream = new BufferedOutputStream(new FileOutputStream(saFile)); + PackedIntOutputStream saIntWriter = new PackedIntOutputStream(saOutputStream); + + // SA file format is 'primary' (= SA-1[0]?), occurrence array, interval, sequence length, SA[] + saIntWriter.write(inverseSuffixArray[0]); + saIntWriter.write(occurrences); + saIntWriter.write(1); + saIntWriter.write(suffixArray.length-1); + saIntWriter.write(suffixArray, 1, suffixArray.length-1); + + saIntWriter.close(); } /** diff --git a/java/src/org/broadinstitute/sting/bwa/OccurrenceOutputStream.java b/java/src/org/broadinstitute/sting/bwa/PackedIntOutputStream.java similarity index 70% rename from java/src/org/broadinstitute/sting/bwa/OccurrenceOutputStream.java rename to java/src/org/broadinstitute/sting/bwa/PackedIntOutputStream.java index 9b3bb2d7e..c530676b7 100755 --- a/java/src/org/broadinstitute/sting/bwa/OccurrenceOutputStream.java +++ b/java/src/org/broadinstitute/sting/bwa/PackedIntOutputStream.java @@ -37,7 +37,7 @@ import java.nio.ByteOrder; * @author mhanna * @version 0.1 */ -public class OccurrenceOutputStream { +public class PackedIntOutputStream { /** * How many bytes does it take to hold an integer in Java? */ @@ -49,35 +49,47 @@ public class OccurrenceOutputStream { private final OutputStream targetOutputStream; /** - * Create a new OccurrenceArrayOutputStream, writing to the given target file. + * Create a new PackedIntOutputStream, writing to the given target file. * @param outputFile target output file. * @throws IOException if an I/O error occurs. */ - public OccurrenceOutputStream( File outputFile ) throws IOException { + public PackedIntOutputStream( File outputFile ) throws IOException { this(new FileOutputStream(outputFile)); } /** - * Write occurrence array to the given OutputStream. - * @param outputStream Output stream to which to write packed bases. + * Write packed ints to the given OutputStream. + * @param outputStream Output stream to which to write packed ints. * @throws IOException if an I/O error occurs. */ - public OccurrenceOutputStream( OutputStream outputStream ) throws IOException { + public PackedIntOutputStream( OutputStream outputStream ) throws IOException { this.targetOutputStream = outputStream; } /** - * Write the cumulative occurrences to the output stream. - * @param occurrences occurrences to write. occurrences.length must match alphabet size. + * Write the data to the output stream. + * @param datum datum to write. * @throws IOException if an I/O error occurs. */ - public void write( int[] occurrences ) throws IOException { - if( occurrences.length > BytePackedOutputStream.ALPHABET_SIZE ) - throw new StingException("Wrong number of occurrence data points; expected " + BytePackedOutputStream.ALPHABET_SIZE); - ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES*occurrences.length).order(ByteOrder.LITTLE_ENDIAN); - for(int occurrence: occurrences) - buffer.putInt(occurrence); - targetOutputStream.write(buffer.array()); + public void write( int datum ) throws IOException { + ByteBuffer buffer = ByteBuffer.allocate(INT_SIZE_IN_BYTES).order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(datum); + targetOutputStream.write(buffer.array()); + } + + /** + * Write the data to the output stream. + * @param data data to write. occurrences.length must match alphabet size. + * @throws IOException if an I/O error occurs. + */ + public void write( int[] data ) throws IOException { + for(int datum: data) + write(datum); + } + + public void write( int[] data, int offset, int length ) throws IOException { + for( int i = offset; i < offset+length; i++ ) + write(data[i]); } /**