My reference implementation is now generating a BWT which matches BWT-SW's.
Note to self: never give project status in an svn log. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1550 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f22f590192
commit
adce3bd536
|
|
@ -139,4 +139,19 @@ public class BytePackedOutputStream {
|
|||
}
|
||||
}
|
||||
|
||||
public static byte decodePackedRepresentation(byte pack) {
|
||||
switch( pack ) {
|
||||
case 0:
|
||||
return 'A';
|
||||
case 1:
|
||||
return 'C';
|
||||
case 2:
|
||||
return 'G';
|
||||
case 3:
|
||||
return 'T';
|
||||
default:
|
||||
throw new StingException("Unknown pack type: " + pack);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import net.sf.samtools.util.StringUtil;
|
|||
import java.io.*;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Comparator;
|
||||
import java.util.Arrays;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
|
|
@ -107,12 +108,12 @@ public class CreateBWTFromReference {
|
|||
}
|
||||
|
||||
private byte[] createBWT( String sequence, int[] suffixArray ) {
|
||||
byte[] bwt = new byte[suffixArray.length];
|
||||
for( int i = 0; i < suffixArray.length; i++ ) {
|
||||
// Find the first character after the current character in the rotation. If the character is past the end
|
||||
// (in other words, '$'), back up to the previous character.
|
||||
int sequenceEnd = Math.min((suffixArray[i]+suffixArray.length-1)%suffixArray.length, sequence.length()-1 );
|
||||
bwt[i] = (byte)sequence.charAt(sequenceEnd);
|
||||
byte[] bwt = new byte[suffixArray.length-1];
|
||||
int i = 0;
|
||||
for( int suffixArrayEntry: suffixArray ) {
|
||||
if( suffixArrayEntry == 0 )
|
||||
continue;
|
||||
bwt[i++] = (byte)sequence.charAt(suffixArrayEntry-1);
|
||||
}
|
||||
return bwt;
|
||||
}
|
||||
|
|
@ -168,12 +169,7 @@ public class CreateBWTFromReference {
|
|||
byte[] bwt = creator.createBWT(sequence, suffixArray);
|
||||
|
||||
String bwtAsString = new String(bwt);
|
||||
//System.out.printf("BWT:%n");
|
||||
while( bwtAsString.length() > 0 ) {
|
||||
int end = Math.min( 80, bwtAsString.length() );
|
||||
//System.out.printf("%s%n", bwtAsString.substring(0,end));
|
||||
bwtAsString = bwtAsString.substring(end);
|
||||
}
|
||||
System.out.printf("BWT: %s...%n", bwtAsString.substring(0,80));
|
||||
|
||||
OutputStream bwtOutputStream = new BufferedOutputStream(new FileOutputStream(bwtFile));
|
||||
|
||||
|
|
@ -201,6 +197,20 @@ public class CreateBWTFromReference {
|
|||
saIntWriter.write(suffixArray, 1, suffixArray.length-1);
|
||||
|
||||
saIntWriter.close();
|
||||
|
||||
File existingBwtFile = new File(inputFileName+".bwt");
|
||||
WordPackedInputStream inputStream = new WordPackedInputStream(existingBwtFile,ByteOrder.LITTLE_ENDIAN);
|
||||
byte[] existingBwt = inputStream.read();
|
||||
|
||||
String existingBwtAsString = new String(existingBwt);
|
||||
System.out.printf("Existing BWT: %s...%n",existingBwtAsString.substring(0,80));
|
||||
|
||||
for( int i = 0; i < bwt.length; i++ ) {
|
||||
if( bwt[i] != existingBwt[i] ) {
|
||||
System.out.printf("First bwt mismatch: %d%n",i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
package org.broadinstitute.sting.bwa;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Reads a word-packed version of the input stream.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class WordPackedInputStream {
|
||||
|
||||
/**
|
||||
* Ultimate source for packed bases.
|
||||
*/
|
||||
private final InputStream targetInputStream;
|
||||
|
||||
/**
|
||||
* A fixed-size buffer for word-packed data.
|
||||
*/
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
public WordPackedInputStream( File inputFile, ByteOrder byteOrder ) throws FileNotFoundException {
|
||||
this.targetInputStream = new BufferedInputStream(new FileInputStream(inputFile));
|
||||
this.buffer = ByteBuffer.allocate(WordPackedOutputStream.BASES_PER_WORD/BytePackedOutputStream.ALPHABET_SIZE).order(byteOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the entire contents of the input stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public byte[] read() throws IOException {
|
||||
// Skip over header info.
|
||||
for( int i = 0; i < 5; i++ ) {
|
||||
targetInputStream.read(buffer.array());
|
||||
System.out.println("Skipping over: " + buffer.getInt());
|
||||
buffer.rewind();
|
||||
}
|
||||
|
||||
List<Byte> bwtList = new ArrayList<Byte>();
|
||||
while(targetInputStream.read(buffer.array()) > 0) {
|
||||
int packedWord = buffer.getInt();
|
||||
for( int i = WordPackedOutputStream.BASES_PER_WORD-1; i >= 0; i-- ) {
|
||||
byte packedByte = (byte)((packedWord >> i*2) & 0x3);
|
||||
bwtList.add(BytePackedOutputStream.decodePackedRepresentation(packedByte));
|
||||
}
|
||||
buffer.rewind();
|
||||
}
|
||||
|
||||
byte[] bwt = new byte[bwtList.size()];
|
||||
for(int i = 0; i < bwtList.size(); i++)
|
||||
bwt[i] = bwtList.get(i);
|
||||
|
||||
return bwt;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -40,7 +40,7 @@ public class WordPackedOutputStream {
|
|||
/**
|
||||
* How many bases can be stored in the given word?
|
||||
*/
|
||||
private static final int BASES_PER_WORD = 16;
|
||||
public static final int BASES_PER_WORD = 16;
|
||||
|
||||
/**
|
||||
* Ultimate target for the packed bases.
|
||||
|
|
|
|||
Loading…
Reference in New Issue