Breaking the signed int glass ceiling; stage 1: convert critical ints to longs. Code cleanup and documentation.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1852 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
79993be46c
commit
ce38fa7c81
|
|
@ -17,19 +17,11 @@ public interface Alignment extends Comparable<Alignment> {
|
|||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public int getAlignmentStart();
|
||||
public long getAlignmentStart();
|
||||
|
||||
/**
|
||||
* Gets the score of this alignment.
|
||||
* @return The score.
|
||||
*/
|
||||
public int getScore();
|
||||
|
||||
/**
|
||||
* Temporary getters.
|
||||
* @return
|
||||
*/
|
||||
public int getMismatches();
|
||||
public int getGapOpens();
|
||||
public int getGapExtensions();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ public class BWAAligner implements Aligner {
|
|||
|
||||
// Found a valid alignment; store it and move on.
|
||||
if(alignment.position >= read.getReadLength()-1) {
|
||||
for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) {
|
||||
for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) {
|
||||
BWAAlignment finalAlignment = alignment.clone();
|
||||
|
||||
if( finalAlignment.isNegativeStrand() )
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class BWAAlignment implements Alignment, Cloneable {
|
|||
/**
|
||||
* Start of the final alignment.
|
||||
*/
|
||||
protected int alignmentStart;
|
||||
protected long alignmentStart;
|
||||
|
||||
/**
|
||||
* Is this match being treated as a negative or positive strand?
|
||||
|
|
@ -65,12 +65,12 @@ public class BWAAlignment implements Alignment, Cloneable {
|
|||
/**
|
||||
* Working variable. The lower bound of the alignment within the BWT.
|
||||
*/
|
||||
protected int loBound;
|
||||
protected long loBound;
|
||||
|
||||
/**
|
||||
* Working variable. The upper bound of the alignment within the BWT.
|
||||
*/
|
||||
protected int hiBound;
|
||||
protected long hiBound;
|
||||
|
||||
/**
|
||||
* Cache the score.
|
||||
|
|
@ -81,7 +81,7 @@ public class BWAAlignment implements Alignment, Cloneable {
|
|||
* Gets the starting position for the given alignment.
|
||||
* @return Starting position.
|
||||
*/
|
||||
public int getAlignmentStart() {
|
||||
public long getAlignmentStart() {
|
||||
return alignmentStart;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ package org.broadinstitute.sting.alignment.bwa;
|
|||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.broadinstitute.sting.alignment.bwa.bwt.Bases;
|
||||
import org.broadinstitute.sting.alignment.bwa.bwt.BWT;
|
||||
|
||||
/**
|
||||
|
|
@ -17,17 +16,17 @@ public class LowerBound {
|
|||
/**
|
||||
* Lower bound of the suffix array.
|
||||
*/
|
||||
public final int loIndex;
|
||||
public final long loIndex;
|
||||
|
||||
/**
|
||||
* Upper bound of the suffix array.
|
||||
*/
|
||||
public final int hiIndex;
|
||||
public final long hiIndex;
|
||||
|
||||
/**
|
||||
* Width of the bwt from loIndex -> hiIndex, inclusive.
|
||||
*/
|
||||
public final int width;
|
||||
public final long width;
|
||||
|
||||
/**
|
||||
* The lower bound at the given point.
|
||||
|
|
@ -36,9 +35,11 @@ public class LowerBound {
|
|||
|
||||
/**
|
||||
* Create a new lower bound with the given value.
|
||||
* @param loIndex The lower bound of the BWT.
|
||||
* @param hiIndex The upper bound of the BWT.
|
||||
* @param value Value for the lower bound at this site.
|
||||
*/
|
||||
private LowerBound(int loIndex, int hiIndex, int value) {
|
||||
private LowerBound(long loIndex, long hiIndex, int value) {
|
||||
this.loIndex = loIndex;
|
||||
this.hiIndex = hiIndex;
|
||||
this.width = hiIndex - loIndex + 1;
|
||||
|
|
@ -47,11 +48,16 @@ public class LowerBound {
|
|||
|
||||
/**
|
||||
* Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper.
|
||||
* @param bases Bases of the read to use when creating a new BWT.
|
||||
* @param bwt BWT to check against.
|
||||
* @return A list of lower bounds at every point in the reference.
|
||||
*
|
||||
*/
|
||||
public static List<LowerBound> create( Byte[] bases, BWT bwt ) {
|
||||
public static List<LowerBound> create(Byte[] bases, BWT bwt) {
|
||||
List<LowerBound> bounds = new ArrayList<LowerBound>();
|
||||
|
||||
int loIndex = 0, hiIndex = bwt.length(), mismatches = 0;
|
||||
long loIndex = 0, hiIndex = bwt.length();
|
||||
int mismatches = 0;
|
||||
for( int i = bases.length-1; i >= 0; i-- ) {
|
||||
Byte base = bases[i];
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class BWT {
|
|||
/**
|
||||
* The inverse SA, used as a placeholder for determining where the special EOL character sits.
|
||||
*/
|
||||
protected final int inverseSA0;
|
||||
protected final long inverseSA0;
|
||||
|
||||
/**
|
||||
* Cumulative counts for the entire BWT.
|
||||
|
|
@ -37,7 +37,7 @@ public class BWT {
|
|||
* @param counts Cumulative count of bases, in A,C,G,T order.
|
||||
* @param sequenceBlocks The full BWT sequence, sans the '$'.
|
||||
*/
|
||||
public BWT( int inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) {
|
||||
public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) {
|
||||
this.inverseSA0 = inverseSA0;
|
||||
this.counts = counts;
|
||||
this.sequenceBlocks = sequenceBlocks;
|
||||
|
|
@ -49,7 +49,7 @@ public class BWT {
|
|||
* @param counts Count of bases, in A,C,G,T order.
|
||||
* @param sequence The full BWT sequence, sans the '$'.
|
||||
*/
|
||||
public BWT( int inverseSA0, Counts counts, byte[] sequence ) {
|
||||
public BWT( long inverseSA0, Counts counts, byte[] sequence ) {
|
||||
this(inverseSA0,counts,generateSequenceBlocks(sequence));
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public class BWT {
|
|||
* @return The full BWT string as a byte array.
|
||||
*/
|
||||
public byte[] getSequence() {
|
||||
byte[] sequence = new byte[counts.getTotal()];
|
||||
byte[] sequence = new byte[(int)counts.getTotal()];
|
||||
for( SequenceBlock block: sequenceBlocks )
|
||||
System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength);
|
||||
return sequence;
|
||||
|
|
@ -69,7 +69,7 @@ public class BWT {
|
|||
* @param base The base.
|
||||
* @return Total counts for all bases lexicographically smaller than this base.
|
||||
*/
|
||||
public int counts(byte base) {
|
||||
public long counts(byte base) {
|
||||
return counts.getCumulative(base);
|
||||
}
|
||||
|
||||
|
|
@ -79,10 +79,10 @@ public class BWT {
|
|||
* @param index The position to search within the BWT.
|
||||
* @return Total counts for all bases lexicographically smaller than this base.
|
||||
*/
|
||||
public int occurrences(byte base,int index) {
|
||||
public long occurrences(byte base,long index) {
|
||||
SequenceBlock block = getSequenceBlock(index);
|
||||
int position = getSequencePosition(index);
|
||||
int accumulator = block.occurrences.get(base);
|
||||
long accumulator = block.occurrences.get(base);
|
||||
for(int i = 0; i <= position; i++) {
|
||||
if(base == block.sequence[i])
|
||||
accumulator++;
|
||||
|
|
@ -94,7 +94,7 @@ public class BWT {
|
|||
* The number of bases in the BWT as a whole.
|
||||
* @return Number of bases.
|
||||
*/
|
||||
public int length() {
|
||||
public long length() {
|
||||
return counts.getTotal();
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ public class BWT {
|
|||
* @param index The index to use.
|
||||
* @return The base at that location.
|
||||
*/
|
||||
protected byte getBase(int index) {
|
||||
protected byte getBase(long index) {
|
||||
if(index == inverseSA0)
|
||||
throw new StingException(String.format("Base at index %d does not have a text representation",index));
|
||||
|
||||
|
|
@ -112,16 +112,16 @@ public class BWT {
|
|||
return block.sequence[position];
|
||||
}
|
||||
|
||||
private SequenceBlock getSequenceBlock(int index) {
|
||||
private SequenceBlock getSequenceBlock(long index) {
|
||||
// If the index is above the SA-1[0], remap it to the appropriate coordinate space.
|
||||
if(index > inverseSA0) index--;
|
||||
return sequenceBlocks[index/SEQUENCE_BLOCK_SIZE];
|
||||
return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)];
|
||||
}
|
||||
|
||||
private int getSequencePosition(int index) {
|
||||
private int getSequencePosition(long index) {
|
||||
// If the index is above the SA-1[0], remap it to the appropriate coordinate space.
|
||||
if(index > inverseSA0) index--;
|
||||
return index%SEQUENCE_BLOCK_SIZE;
|
||||
return (int)(index%SEQUENCE_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.BasePackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.PackUtils;
|
||||
|
||||
|
|
@ -37,29 +37,29 @@ public class BWTReader {
|
|||
* @return The BWT stored in the input stream.
|
||||
*/
|
||||
public BWT read() {
|
||||
IntPackedInputStream intPackedInputStream = new IntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
BasePackedInputStream basePackedInputStream = new BasePackedInputStream<Integer>(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
int inverseSA0;
|
||||
int[] count;
|
||||
long inverseSA0;
|
||||
long[] count;
|
||||
SequenceBlock[] sequenceBlocks;
|
||||
|
||||
try {
|
||||
inverseSA0 = intPackedInputStream.read();
|
||||
count = new int[PackUtils.ALPHABET_SIZE];
|
||||
intPackedInputStream.read(count);
|
||||
inverseSA0 = uintPackedInputStream.read();
|
||||
count = new long[PackUtils.ALPHABET_SIZE];
|
||||
uintPackedInputStream.read(count);
|
||||
|
||||
int bwtSize = count[PackUtils.ALPHABET_SIZE-1];
|
||||
long bwtSize = count[PackUtils.ALPHABET_SIZE-1];
|
||||
sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)];
|
||||
|
||||
for( int block = 0; block < sequenceBlocks.length; block++ ) {
|
||||
int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE;
|
||||
int sequenceLength = Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart);
|
||||
int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart);
|
||||
|
||||
int[] occurrences = new int[PackUtils.ALPHABET_SIZE];
|
||||
long[] occurrences = new long[PackUtils.ALPHABET_SIZE];
|
||||
byte[] bwt = new byte[sequenceLength];
|
||||
|
||||
intPackedInputStream.read(occurrences);
|
||||
uintPackedInputStream.read(occurrences);
|
||||
basePackedInputStream.read(bwt);
|
||||
|
||||
sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedOutputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedOutputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.BasePackedOutputStream;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -37,7 +37,7 @@ public class BWTWriter {
|
|||
* @param bwt Transform to be written to the output stream.
|
||||
*/
|
||||
public void write( BWT bwt ) {
|
||||
IntPackedOutputStream intPackedOutputStream = new IntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream<Integer>(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@ public class Counts implements Cloneable {
|
|||
/**
|
||||
* Internal representation of counts, broken down by ASCII value.
|
||||
*/
|
||||
private Map<Byte,Integer> counts = new HashMap<Byte,Integer>();
|
||||
private Map<Byte,Long> counts = new HashMap<Byte,Long>();
|
||||
|
||||
/**
|
||||
* Internal representation of cumulative counts, broken down by ASCII value.
|
||||
*/
|
||||
private Map<Byte,Integer> cumulativeCounts = new HashMap<Byte,Integer>();
|
||||
private Map<Byte,Long> cumulativeCounts = new HashMap<Byte,Long>();
|
||||
|
||||
/**
|
||||
* Create an empty Counts object with values A=0,C=0,G=0,T=0.
|
||||
|
|
@ -28,8 +28,8 @@ public class Counts implements Cloneable {
|
|||
public Counts()
|
||||
{
|
||||
for(byte base: Bases.instance) {
|
||||
counts.put(base,0);
|
||||
cumulativeCounts.put(base,0);
|
||||
counts.put(base,0L);
|
||||
cumulativeCounts.put(base,0L);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -38,20 +38,20 @@ public class Counts implements Cloneable {
|
|||
* @param data Count data, broken down by base.
|
||||
* @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example).
|
||||
*/
|
||||
public Counts( int[] data, boolean cumulative ) {
|
||||
public Counts( long[] data, boolean cumulative ) {
|
||||
if(cumulative) {
|
||||
int priorCount = 0;
|
||||
long priorCount = 0;
|
||||
for(byte base: Bases.instance) {
|
||||
int count = data[Bases.toPack(base)];
|
||||
long count = data[Bases.toPack(base)];
|
||||
counts.put(base,count-priorCount);
|
||||
cumulativeCounts.put(base,priorCount);
|
||||
priorCount = count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int priorCount = 0;
|
||||
long priorCount = 0;
|
||||
for(byte base: Bases.instance) {
|
||||
int count = data[Bases.toPack(base)];
|
||||
long count = data[Bases.toPack(base)];
|
||||
counts.put(base,count);
|
||||
cumulativeCounts.put(base,priorCount);
|
||||
priorCount += count;
|
||||
|
|
@ -64,8 +64,8 @@ public class Counts implements Cloneable {
|
|||
* @param cumulative Use a cumulative representation.
|
||||
* @return Array of count values.
|
||||
*/
|
||||
public int[] toArray(boolean cumulative) {
|
||||
int[] countArray = new int[counts.size()];
|
||||
public long[] toArray(boolean cumulative) {
|
||||
long[] countArray = new long[counts.size()];
|
||||
if(cumulative) {
|
||||
int index = 0;
|
||||
boolean first = true;
|
||||
|
|
@ -98,8 +98,8 @@ public class Counts implements Cloneable {
|
|||
catch(CloneNotSupportedException ex) {
|
||||
throw new StingException("Unable to clone counts object", ex);
|
||||
}
|
||||
other.counts = new HashMap<Byte,Integer>(counts);
|
||||
other.cumulativeCounts = new HashMap<Byte,Integer>(cumulativeCounts);
|
||||
other.counts = new HashMap<Byte,Long>(counts);
|
||||
other.cumulativeCounts = new HashMap<Byte,Long>(cumulativeCounts);
|
||||
return other;
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ public class Counts implements Cloneable {
|
|||
* @param base Base for which to query counts.
|
||||
* @return Number of bases of this type seen.
|
||||
*/
|
||||
public int get(byte base) {
|
||||
public long get(byte base) {
|
||||
return counts.get(base);
|
||||
}
|
||||
|
||||
|
|
@ -133,7 +133,7 @@ public class Counts implements Cloneable {
|
|||
* @param base Base for which to query counts.
|
||||
* @return Number of bases of this type seen.
|
||||
*/
|
||||
public int getCumulative(byte base) {
|
||||
public long getCumulative(byte base) {
|
||||
return cumulativeCounts.get(base);
|
||||
}
|
||||
|
||||
|
|
@ -141,7 +141,7 @@ public class Counts implements Cloneable {
|
|||
* How many total bases are represented by this count structure?
|
||||
* @return Total bases represented.
|
||||
*/
|
||||
public int getTotal() {
|
||||
public long getTotal() {
|
||||
int accumulator = 0;
|
||||
for(byte base: Bases.instance) {
|
||||
accumulator += get(base);
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class CreateBWTFromReference {
|
|||
return occurrences;
|
||||
}
|
||||
|
||||
private int[] createSuffixArray( String sequence ) {
|
||||
private long[] createSuffixArray( String sequence ) {
|
||||
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>( new SuffixArrayComparator(sequence) );
|
||||
|
||||
// Build out the suffix array using a custom comparator.
|
||||
|
|
@ -77,7 +77,7 @@ public class CreateBWTFromReference {
|
|||
}
|
||||
|
||||
// Copy the suffix array into an int array.
|
||||
int[] suffixArray = new int[suffixArrayBuilder.size()];
|
||||
long[] suffixArray = new long[suffixArrayBuilder.size()];
|
||||
int i = 0;
|
||||
for( Integer element: suffixArrayBuilder )
|
||||
suffixArray[i++] = element;
|
||||
|
|
@ -85,35 +85,35 @@ public class CreateBWTFromReference {
|
|||
return suffixArray;
|
||||
}
|
||||
|
||||
private int[] invertSuffixArray( int[] suffixArray ) {
|
||||
int[] inverseSuffixArray = new int[suffixArray.length];
|
||||
private long[] invertSuffixArray( long[] suffixArray ) {
|
||||
long[] inverseSuffixArray = new long[suffixArray.length];
|
||||
for( int i = 0; i < suffixArray.length; i++ )
|
||||
inverseSuffixArray[suffixArray[i]] = i;
|
||||
inverseSuffixArray[(int)suffixArray[i]] = i;
|
||||
return inverseSuffixArray;
|
||||
}
|
||||
|
||||
private int[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) {
|
||||
int[] compressedSuffixArray = new int[suffixArray.length];
|
||||
private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) {
|
||||
long[] compressedSuffixArray = new long[suffixArray.length];
|
||||
compressedSuffixArray[0] = inverseSuffixArray[0];
|
||||
for( int i = 1; i < suffixArray.length; i++ )
|
||||
compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1];
|
||||
return compressedSuffixArray;
|
||||
}
|
||||
|
||||
private int[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
|
||||
int[] inverseCompressedSuffixArray = new int[compressedSuffixArray.length];
|
||||
private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
|
||||
long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length];
|
||||
for( int i = 0; i < compressedSuffixArray.length; i++ )
|
||||
inverseCompressedSuffixArray[compressedSuffixArray[i]] = i;
|
||||
return inverseCompressedSuffixArray;
|
||||
}
|
||||
|
||||
private byte[] createBWT( String sequence, int[] suffixArray ) {
|
||||
private byte[] createBWT( String sequence, long[] suffixArray ) {
|
||||
byte[] bwt = new byte[suffixArray.length-1];
|
||||
int i = 0;
|
||||
for( int suffixArrayEntry: suffixArray ) {
|
||||
for( long suffixArrayEntry: suffixArray ) {
|
||||
if( suffixArrayEntry == 0 )
|
||||
continue;
|
||||
bwt[i++] = (byte)sequence.charAt(suffixArrayEntry-1);
|
||||
bwt[i++] = (byte)sequence.charAt((int)suffixArrayEntry-1);
|
||||
}
|
||||
return bwt;
|
||||
}
|
||||
|
|
@ -152,12 +152,12 @@ public class CreateBWTFromReference {
|
|||
occurrences.getCumulative(Bases.T));
|
||||
|
||||
// Generate the suffix array and print diagnostics.
|
||||
int[] suffixArrayData = creator.createSuffixArray(sequence);
|
||||
int[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence);
|
||||
long[] suffixArrayData = creator.createSuffixArray(sequence);
|
||||
long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence);
|
||||
|
||||
// Invert the suffix array and print diagnostics.
|
||||
int[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData);
|
||||
int[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData);
|
||||
long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData);
|
||||
long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData);
|
||||
|
||||
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
|
||||
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
|
||||
|
|
@ -186,7 +186,7 @@ public class CreateBWTFromReference {
|
|||
*/
|
||||
|
||||
// Create the BWT.
|
||||
BWT bwt = new BWT( inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence) );
|
||||
BWT bwt = new BWT(inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence));
|
||||
BWT reverseBWT = new BWT( reverseInverseSuffixArray[0], occurrences, creator.createBWT(reverseSequence, reverseSuffixArray.sequence));
|
||||
|
||||
byte[] bwtSequence = bwt.getSequence();
|
||||
|
|
|
|||
|
|
@ -9,13 +9,13 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class SuffixArray {
|
||||
public final int inverseSA0;
|
||||
public final long inverseSA0;
|
||||
public final Counts occurrences;
|
||||
|
||||
/**
|
||||
* The elements of the sequence actually stored in memory.
|
||||
*/
|
||||
protected final int[] sequence;
|
||||
protected final long[] sequence;
|
||||
|
||||
/**
|
||||
* How often are individual elements in the sequence actually stored
|
||||
|
|
@ -28,7 +28,7 @@ public class SuffixArray {
|
|||
*/
|
||||
protected final BWT bwt;
|
||||
|
||||
public SuffixArray(int inverseSA0, Counts occurrences, int[] sequence) {
|
||||
public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) {
|
||||
this(inverseSA0,occurrences,sequence,1,null);
|
||||
}
|
||||
|
||||
|
|
@ -37,8 +37,10 @@ public class SuffixArray {
|
|||
* @param inverseSA0 Inverse SA entry for the first element.
|
||||
* @param occurrences Cumulative number of occurrences of A,C,G,T, in order.
|
||||
* @param sequence The full suffix array.
|
||||
* @param sequenceInterval How frequently is the sequence interval stored.
|
||||
* @param bwt bwt used to infer the remaining entries in the BWT.
|
||||
*/
|
||||
public SuffixArray(int inverseSA0, Counts occurrences, int[] sequence, int sequenceInterval, BWT bwt) {
|
||||
public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) {
|
||||
this.inverseSA0 = inverseSA0;
|
||||
this.occurrences = occurrences;
|
||||
this.sequence = sequence;
|
||||
|
|
@ -53,7 +55,7 @@ public class SuffixArray {
|
|||
* Retrieves the length of the sequence array.
|
||||
* @return Length of the suffix array.
|
||||
*/
|
||||
public int length() {
|
||||
public long length() {
|
||||
if( bwt != null )
|
||||
return bwt.length()+1;
|
||||
else
|
||||
|
|
@ -65,7 +67,7 @@ public class SuffixArray {
|
|||
* @param index Index at which to retrieve the suffix array vaule.
|
||||
* @return The suffix array value at that entry.
|
||||
*/
|
||||
public int get(int index) {
|
||||
public long get(long index) {
|
||||
int iterations = 0;
|
||||
while(index%sequenceInterval != 0) {
|
||||
// The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case.
|
||||
|
|
@ -77,6 +79,6 @@ public class SuffixArray {
|
|||
}
|
||||
iterations++;
|
||||
}
|
||||
return (sequence[index/sequenceInterval]+iterations) % length();
|
||||
return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedInputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.PackUtils;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -44,21 +44,21 @@ public class SuffixArrayReader {
|
|||
* @return The suffix array stored in the input stream.
|
||||
*/
|
||||
public SuffixArray read() {
|
||||
IntPackedInputStream intPackedInputStream = new IntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
int inverseSA0;
|
||||
int[] occurrences;
|
||||
int[] suffixArray;
|
||||
long inverseSA0;
|
||||
long[] occurrences;
|
||||
long[] suffixArray;
|
||||
int suffixArrayInterval;
|
||||
|
||||
try {
|
||||
inverseSA0 = intPackedInputStream.read();
|
||||
occurrences = new int[PackUtils.ALPHABET_SIZE];
|
||||
intPackedInputStream.read(occurrences);
|
||||
inverseSA0 = uintPackedInputStream.read();
|
||||
occurrences = new long[PackUtils.ALPHABET_SIZE];
|
||||
uintPackedInputStream.read(occurrences);
|
||||
// Throw away the suffix array size in bytes and use the occurrences table directly.
|
||||
suffixArrayInterval = intPackedInputStream.read();
|
||||
suffixArray = new int[(occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval];
|
||||
intPackedInputStream.read(suffixArray);
|
||||
suffixArrayInterval = (int)uintPackedInputStream.read();
|
||||
suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)];
|
||||
uintPackedInputStream.read(suffixArray);
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new StingException("Unable to read BWT from input stream.", ex);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.alignment.bwa.bwt;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedOutputStream;
|
||||
import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedOutputStream;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.ByteOrder;
|
||||
|
|
@ -36,16 +36,16 @@ public class SuffixArrayWriter {
|
|||
* @param suffixArray suffix array to write.
|
||||
*/
|
||||
public void write(SuffixArray suffixArray) {
|
||||
IntPackedOutputStream intPackedOutputStream = new IntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
try {
|
||||
intPackedOutputStream.write(suffixArray.inverseSA0);
|
||||
intPackedOutputStream.write(suffixArray.occurrences.toArray(true));
|
||||
uintPackedOutputStream.write(suffixArray.inverseSA0);
|
||||
uintPackedOutputStream.write(suffixArray.occurrences.toArray(true));
|
||||
// How frequently the suffix array entry is placed.
|
||||
intPackedOutputStream.write(1);
|
||||
uintPackedOutputStream.write(1);
|
||||
// Length of the suffix array.
|
||||
intPackedOutputStream.write(suffixArray.length()-1);
|
||||
intPackedOutputStream.write(suffixArray.sequence, 1, suffixArray.length()-1);
|
||||
uintPackedOutputStream.write(suffixArray.length()-1);
|
||||
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length);
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new StingException("Unable to read BWT from input stream.", ex);
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ public class PackUtils {
|
|||
* @param partitionSize Size of an individual partition.
|
||||
* @return Number of partitions that would be created.
|
||||
*/
|
||||
public static int numberOfPartitions( int size, int partitionSize ) {
|
||||
return (size + partitionSize - 1)/partitionSize;
|
||||
public static int numberOfPartitions( long size, long partitionSize ) {
|
||||
return (int)((size+partitionSize-1) / partitionSize);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import java.nio.channels.FileChannel;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class IntPackedInputStream {
|
||||
public class UnsignedIntPackedInputStream {
|
||||
/**
|
||||
* Ultimate target for the occurrence array.
|
||||
*/
|
||||
|
|
@ -38,7 +38,7 @@ public class IntPackedInputStream {
|
|||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws java.io.IOException if an I/O error occurs.
|
||||
*/
|
||||
public IntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
|
||||
public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
|
||||
this(new FileInputStream(inputFile),byteOrder);
|
||||
}
|
||||
|
||||
|
|
@ -47,7 +47,7 @@ public class IntPackedInputStream {
|
|||
* @param inputStream Input stream from which to read ints.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
*/
|
||||
public IntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) {
|
||||
public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) {
|
||||
this.targetInputStream = inputStream;
|
||||
this.targetInputChannel = inputStream.getChannel();
|
||||
this.byteOrder = byteOrder;
|
||||
|
|
@ -58,8 +58,8 @@ public class IntPackedInputStream {
|
|||
* @return The next input datum in the stream.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public int read() throws IOException {
|
||||
int[] data = new int[1];
|
||||
public long read() throws IOException {
|
||||
long[] data = new long[1];
|
||||
read(data);
|
||||
return data[0];
|
||||
}
|
||||
|
|
@ -69,7 +69,7 @@ public class IntPackedInputStream {
|
|||
* @param data placeholder for input data.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read( int[] data ) throws IOException {
|
||||
public void read( long[] data ) throws IOException {
|
||||
read( data, 0, data.length );
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class IntPackedInputStream {
|
|||
* @param length number of ints to read in.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void read( int[] data, int offset, int length ) throws IOException {
|
||||
public void read( long[] data, int offset, int length ) throws IOException {
|
||||
ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder);
|
||||
|
||||
targetInputChannel.read(readBuffer,targetInputChannel.position());
|
||||
|
|
@ -35,7 +35,7 @@ import java.nio.ByteOrder;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class IntPackedOutputStream {
|
||||
public class UnsignedIntPackedOutputStream {
|
||||
/**
|
||||
* Ultimate target for the occurrence array.
|
||||
*/
|
||||
|
|
@ -52,7 +52,7 @@ public class IntPackedOutputStream {
|
|||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public IntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
|
||||
public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
|
||||
this(new FileOutputStream(outputFile),byteOrder);
|
||||
}
|
||||
|
||||
|
|
@ -60,9 +60,8 @@ public class IntPackedOutputStream {
|
|||
* Write packed ints to the given OutputStream.
|
||||
* @param outputStream Output stream to which to write packed ints.
|
||||
* @param byteOrder Endianness to use when writing a list of integers.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public IntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) {
|
||||
public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) {
|
||||
this.targetOutputStream = outputStream;
|
||||
buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
|
||||
}
|
||||
|
|
@ -72,9 +71,9 @@ public class IntPackedOutputStream {
|
|||
* @param datum datum to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int datum ) throws IOException {
|
||||
public void write( long datum ) throws IOException {
|
||||
buffer.rewind();
|
||||
buffer.putInt(datum);
|
||||
buffer.putInt((int)datum);
|
||||
targetOutputStream.write(buffer.array());
|
||||
}
|
||||
|
||||
|
|
@ -83,8 +82,8 @@ public class IntPackedOutputStream {
|
|||
* @param data data to write. occurrences.length must match alphabet size.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int[] data ) throws IOException {
|
||||
for(int datum: data)
|
||||
public void write( long[] data ) throws IOException {
|
||||
for(long datum: data)
|
||||
write(datum);
|
||||
}
|
||||
|
||||
|
|
@ -95,7 +94,7 @@ public class IntPackedOutputStream {
|
|||
* @param length number of ints to write.
|
||||
* @throws IOException if an I/O error occurs.
|
||||
*/
|
||||
public void write( int[] data, int offset, int length ) throws IOException {
|
||||
public void write( long[] data, int offset, int length ) throws IOException {
|
||||
for( int i = offset; i < offset+length; i++ )
|
||||
write(data[i]);
|
||||
}
|
||||
Loading…
Reference in New Issue