Breaking the signed int glass ceiling; stage 1: convert critical ints to longs. Code cleanup and documentation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1852 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-10-15 15:28:56 +00:00
parent 79993be46c
commit ce38fa7c81
15 changed files with 122 additions and 123 deletions

View File

@ -17,19 +17,11 @@ public interface Alignment extends Comparable<Alignment> {
* Gets the starting position for the given alignment. * Gets the starting position for the given alignment.
* @return Starting position. * @return Starting position.
*/ */
public int getAlignmentStart(); public long getAlignmentStart();
/** /**
* Gets the score of this alignment. * Gets the score of this alignment.
* @return The score. * @return The score.
*/ */
public int getScore(); public int getScore();
/**
* Temporary getters.
* @return
*/
public int getMismatches();
public int getGapOpens();
public int getGapExtensions();
} }

View File

@ -124,7 +124,7 @@ public class BWAAligner implements Aligner {
// Found a valid alignment; store it and move on. // Found a valid alignment; store it and move on.
if(alignment.position >= read.getReadLength()-1) { if(alignment.position >= read.getReadLength()-1) {
for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) { for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) {
BWAAlignment finalAlignment = alignment.clone(); BWAAlignment finalAlignment = alignment.clone();
if( finalAlignment.isNegativeStrand() ) if( finalAlignment.isNegativeStrand() )

View File

@ -30,7 +30,7 @@ public class BWAAlignment implements Alignment, Cloneable {
/** /**
* Start of the final alignment. * Start of the final alignment.
*/ */
protected int alignmentStart; protected long alignmentStart;
/** /**
* Is this match being treated as a negative or positive strand? * Is this match being treated as a negative or positive strand?
@ -65,12 +65,12 @@ public class BWAAlignment implements Alignment, Cloneable {
/** /**
* Working variable. The lower bound of the alignment within the BWT. * Working variable. The lower bound of the alignment within the BWT.
*/ */
protected int loBound; protected long loBound;
/** /**
* Working variable. The upper bound of the alignment within the BWT. * Working variable. The upper bound of the alignment within the BWT.
*/ */
protected int hiBound; protected long hiBound;
/** /**
* Cache the score. * Cache the score.
@ -81,7 +81,7 @@ public class BWAAlignment implements Alignment, Cloneable {
* Gets the starting position for the given alignment. * Gets the starting position for the given alignment.
* @return Starting position. * @return Starting position.
*/ */
public int getAlignmentStart() { public long getAlignmentStart() {
return alignmentStart; return alignmentStart;
} }

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.alignment.bwa;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import org.broadinstitute.sting.alignment.bwa.bwt.Bases;
import org.broadinstitute.sting.alignment.bwa.bwt.BWT; import org.broadinstitute.sting.alignment.bwa.bwt.BWT;
/** /**
@ -17,17 +16,17 @@ public class LowerBound {
/** /**
* Lower bound of the suffix array. * Lower bound of the suffix array.
*/ */
public final int loIndex; public final long loIndex;
/** /**
* Upper bound of the suffix array. * Upper bound of the suffix array.
*/ */
public final int hiIndex; public final long hiIndex;
/** /**
* Width of the bwt from loIndex -> hiIndex, inclusive. * Width of the bwt from loIndex -> hiIndex, inclusive.
*/ */
public final int width; public final long width;
/** /**
* The lower bound at the given point. * The lower bound at the given point.
@ -36,9 +35,11 @@ public class LowerBound {
/** /**
* Create a new lower bound with the given value. * Create a new lower bound with the given value.
* @param loIndex The lower bound of the BWT.
* @param hiIndex The upper bound of the BWT.
* @param value Value for the lower bound at this site. * @param value Value for the lower bound at this site.
*/ */
private LowerBound(int loIndex, int hiIndex, int value) { private LowerBound(long loIndex, long hiIndex, int value) {
this.loIndex = loIndex; this.loIndex = loIndex;
this.hiIndex = hiIndex; this.hiIndex = hiIndex;
this.width = hiIndex - loIndex + 1; this.width = hiIndex - loIndex + 1;
@ -47,11 +48,16 @@ public class LowerBound {
/** /**
* Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper. * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper.
* @param bases Bases of the read to use when creating a new BWT.
* @param bwt BWT to check against.
* @return A list of lower bounds at every point in the reference.
*
*/ */
public static List<LowerBound> create( Byte[] bases, BWT bwt ) { public static List<LowerBound> create(Byte[] bases, BWT bwt) {
List<LowerBound> bounds = new ArrayList<LowerBound>(); List<LowerBound> bounds = new ArrayList<LowerBound>();
int loIndex = 0, hiIndex = bwt.length(), mismatches = 0; long loIndex = 0, hiIndex = bwt.length();
int mismatches = 0;
for( int i = bases.length-1; i >= 0; i-- ) { for( int i = bases.length-1; i >= 0; i-- ) {
Byte base = bases[i]; Byte base = bases[i];

View File

@ -19,7 +19,7 @@ public class BWT {
/** /**
* The inverse SA, used as a placeholder for determining where the special EOL character sits. * The inverse SA, used as a placeholder for determining where the special EOL character sits.
*/ */
protected final int inverseSA0; protected final long inverseSA0;
/** /**
* Cumulative counts for the entire BWT. * Cumulative counts for the entire BWT.
@ -37,7 +37,7 @@ public class BWT {
* @param counts Cumulative count of bases, in A,C,G,T order. * @param counts Cumulative count of bases, in A,C,G,T order.
* @param sequenceBlocks The full BWT sequence, sans the '$'. * @param sequenceBlocks The full BWT sequence, sans the '$'.
*/ */
public BWT( int inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) { public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) {
this.inverseSA0 = inverseSA0; this.inverseSA0 = inverseSA0;
this.counts = counts; this.counts = counts;
this.sequenceBlocks = sequenceBlocks; this.sequenceBlocks = sequenceBlocks;
@ -49,7 +49,7 @@ public class BWT {
* @param counts Count of bases, in A,C,G,T order. * @param counts Count of bases, in A,C,G,T order.
* @param sequence The full BWT sequence, sans the '$'. * @param sequence The full BWT sequence, sans the '$'.
*/ */
public BWT( int inverseSA0, Counts counts, byte[] sequence ) { public BWT( long inverseSA0, Counts counts, byte[] sequence ) {
this(inverseSA0,counts,generateSequenceBlocks(sequence)); this(inverseSA0,counts,generateSequenceBlocks(sequence));
} }
@ -58,7 +58,7 @@ public class BWT {
* @return The full BWT string as a byte array. * @return The full BWT string as a byte array.
*/ */
public byte[] getSequence() { public byte[] getSequence() {
byte[] sequence = new byte[counts.getTotal()]; byte[] sequence = new byte[(int)counts.getTotal()];
for( SequenceBlock block: sequenceBlocks ) for( SequenceBlock block: sequenceBlocks )
System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength); System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength);
return sequence; return sequence;
@ -69,7 +69,7 @@ public class BWT {
* @param base The base. * @param base The base.
* @return Total counts for all bases lexicographically smaller than this base. * @return Total counts for all bases lexicographically smaller than this base.
*/ */
public int counts(byte base) { public long counts(byte base) {
return counts.getCumulative(base); return counts.getCumulative(base);
} }
@ -79,10 +79,10 @@ public class BWT {
* @param index The position to search within the BWT. * @param index The position to search within the BWT.
* @return Total counts for all bases lexicographically smaller than this base. * @return Total counts for all bases lexicographically smaller than this base.
*/ */
public int occurrences(byte base,int index) { public long occurrences(byte base,long index) {
SequenceBlock block = getSequenceBlock(index); SequenceBlock block = getSequenceBlock(index);
int position = getSequencePosition(index); int position = getSequencePosition(index);
int accumulator = block.occurrences.get(base); long accumulator = block.occurrences.get(base);
for(int i = 0; i <= position; i++) { for(int i = 0; i <= position; i++) {
if(base == block.sequence[i]) if(base == block.sequence[i])
accumulator++; accumulator++;
@ -94,7 +94,7 @@ public class BWT {
* The number of bases in the BWT as a whole. * The number of bases in the BWT as a whole.
* @return Number of bases. * @return Number of bases.
*/ */
public int length() { public long length() {
return counts.getTotal(); return counts.getTotal();
} }
@ -103,7 +103,7 @@ public class BWT {
* @param index The index to use. * @param index The index to use.
* @return The base at that location. * @return The base at that location.
*/ */
protected byte getBase(int index) { protected byte getBase(long index) {
if(index == inverseSA0) if(index == inverseSA0)
throw new StingException(String.format("Base at index %d does not have a text representation",index)); throw new StingException(String.format("Base at index %d does not have a text representation",index));
@ -112,16 +112,16 @@ public class BWT {
return block.sequence[position]; return block.sequence[position];
} }
private SequenceBlock getSequenceBlock(int index) { private SequenceBlock getSequenceBlock(long index) {
// If the index is above the SA-1[0], remap it to the appropriate coordinate space. // If the index is above the SA-1[0], remap it to the appropriate coordinate space.
if(index > inverseSA0) index--; if(index > inverseSA0) index--;
return sequenceBlocks[index/SEQUENCE_BLOCK_SIZE]; return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)];
} }
private int getSequencePosition(int index) { private int getSequencePosition(long index) {
// If the index is above the SA-1[0], remap it to the appropriate coordinate space. // If the index is above the SA-1[0], remap it to the appropriate coordinate space.
if(index > inverseSA0) index--; if(index > inverseSA0) index--;
return index%SEQUENCE_BLOCK_SIZE; return (int)(index%SEQUENCE_BLOCK_SIZE);
} }
/** /**

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.alignment.bwa.bwt; package org.broadinstitute.sting.alignment.bwa.bwt;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedInputStream; import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedInputStream;
import org.broadinstitute.sting.alignment.bwa.packing.BasePackedInputStream; import org.broadinstitute.sting.alignment.bwa.packing.BasePackedInputStream;
import org.broadinstitute.sting.alignment.bwa.packing.PackUtils; import org.broadinstitute.sting.alignment.bwa.packing.PackUtils;
@ -37,29 +37,29 @@ public class BWTReader {
* @return The BWT stored in the input stream. * @return The BWT stored in the input stream.
*/ */
public BWT read() { public BWT read() {
IntPackedInputStream intPackedInputStream = new IntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
BasePackedInputStream basePackedInputStream = new BasePackedInputStream<Integer>(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN); BasePackedInputStream basePackedInputStream = new BasePackedInputStream<Integer>(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN);
int inverseSA0; long inverseSA0;
int[] count; long[] count;
SequenceBlock[] sequenceBlocks; SequenceBlock[] sequenceBlocks;
try { try {
inverseSA0 = intPackedInputStream.read(); inverseSA0 = uintPackedInputStream.read();
count = new int[PackUtils.ALPHABET_SIZE]; count = new long[PackUtils.ALPHABET_SIZE];
intPackedInputStream.read(count); uintPackedInputStream.read(count);
int bwtSize = count[PackUtils.ALPHABET_SIZE-1]; long bwtSize = count[PackUtils.ALPHABET_SIZE-1];
sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)]; sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)];
for( int block = 0; block < sequenceBlocks.length; block++ ) { for( int block = 0; block < sequenceBlocks.length; block++ ) {
int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE; int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE;
int sequenceLength = Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart); int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart);
int[] occurrences = new int[PackUtils.ALPHABET_SIZE]; long[] occurrences = new long[PackUtils.ALPHABET_SIZE];
byte[] bwt = new byte[sequenceLength]; byte[] bwt = new byte[sequenceLength];
intPackedInputStream.read(occurrences); uintPackedInputStream.read(occurrences);
basePackedInputStream.read(bwt); basePackedInputStream.read(bwt);
sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt); sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt);

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.alignment.bwa.bwt; package org.broadinstitute.sting.alignment.bwa.bwt;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedOutputStream; import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedOutputStream;
import org.broadinstitute.sting.alignment.bwa.packing.BasePackedOutputStream; import org.broadinstitute.sting.alignment.bwa.packing.BasePackedOutputStream;
import java.io.*; import java.io.*;
@ -37,7 +37,7 @@ public class BWTWriter {
* @param bwt Transform to be written to the output stream. * @param bwt Transform to be written to the output stream.
*/ */
public void write( BWT bwt ) { public void write( BWT bwt ) {
IntPackedOutputStream intPackedOutputStream = new IntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream<Integer>(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN); BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream<Integer>(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN);
try { try {

View File

@ -15,12 +15,12 @@ public class Counts implements Cloneable {
/** /**
* Internal representation of counts, broken down by ASCII value. * Internal representation of counts, broken down by ASCII value.
*/ */
private Map<Byte,Integer> counts = new HashMap<Byte,Integer>(); private Map<Byte,Long> counts = new HashMap<Byte,Long>();
/** /**
* Internal representation of cumulative counts, broken down by ASCII value. * Internal representation of cumulative counts, broken down by ASCII value.
*/ */
private Map<Byte,Integer> cumulativeCounts = new HashMap<Byte,Integer>(); private Map<Byte,Long> cumulativeCounts = new HashMap<Byte,Long>();
/** /**
* Create an empty Counts object with values A=0,C=0,G=0,T=0. * Create an empty Counts object with values A=0,C=0,G=0,T=0.
@ -28,8 +28,8 @@ public class Counts implements Cloneable {
public Counts() public Counts()
{ {
for(byte base: Bases.instance) { for(byte base: Bases.instance) {
counts.put(base,0); counts.put(base,0L);
cumulativeCounts.put(base,0); cumulativeCounts.put(base,0L);
} }
} }
@ -38,20 +38,20 @@ public class Counts implements Cloneable {
* @param data Count data, broken down by base. * @param data Count data, broken down by base.
* @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example). * @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example).
*/ */
public Counts( int[] data, boolean cumulative ) { public Counts( long[] data, boolean cumulative ) {
if(cumulative) { if(cumulative) {
int priorCount = 0; long priorCount = 0;
for(byte base: Bases.instance) { for(byte base: Bases.instance) {
int count = data[Bases.toPack(base)]; long count = data[Bases.toPack(base)];
counts.put(base,count-priorCount); counts.put(base,count-priorCount);
cumulativeCounts.put(base,priorCount); cumulativeCounts.put(base,priorCount);
priorCount = count; priorCount = count;
} }
} }
else { else {
int priorCount = 0; long priorCount = 0;
for(byte base: Bases.instance) { for(byte base: Bases.instance) {
int count = data[Bases.toPack(base)]; long count = data[Bases.toPack(base)];
counts.put(base,count); counts.put(base,count);
cumulativeCounts.put(base,priorCount); cumulativeCounts.put(base,priorCount);
priorCount += count; priorCount += count;
@ -64,8 +64,8 @@ public class Counts implements Cloneable {
* @param cumulative Use a cumulative representation. * @param cumulative Use a cumulative representation.
* @return Array of count values. * @return Array of count values.
*/ */
public int[] toArray(boolean cumulative) { public long[] toArray(boolean cumulative) {
int[] countArray = new int[counts.size()]; long[] countArray = new long[counts.size()];
if(cumulative) { if(cumulative) {
int index = 0; int index = 0;
boolean first = true; boolean first = true;
@ -98,8 +98,8 @@ public class Counts implements Cloneable {
catch(CloneNotSupportedException ex) { catch(CloneNotSupportedException ex) {
throw new StingException("Unable to clone counts object", ex); throw new StingException("Unable to clone counts object", ex);
} }
other.counts = new HashMap<Byte,Integer>(counts); other.counts = new HashMap<Byte,Long>(counts);
other.cumulativeCounts = new HashMap<Byte,Integer>(cumulativeCounts); other.cumulativeCounts = new HashMap<Byte,Long>(cumulativeCounts);
return other; return other;
} }
@ -123,7 +123,7 @@ public class Counts implements Cloneable {
* @param base Base for which to query counts. * @param base Base for which to query counts.
* @return Number of bases of this type seen. * @return Number of bases of this type seen.
*/ */
public int get(byte base) { public long get(byte base) {
return counts.get(base); return counts.get(base);
} }
@ -133,7 +133,7 @@ public class Counts implements Cloneable {
* @param base Base for which to query counts. * @param base Base for which to query counts.
* @return Number of bases of this type seen. * @return Number of bases of this type seen.
*/ */
public int getCumulative(byte base) { public long getCumulative(byte base) {
return cumulativeCounts.get(base); return cumulativeCounts.get(base);
} }
@ -141,7 +141,7 @@ public class Counts implements Cloneable {
* How many total bases are represented by this count structure? * How many total bases are represented by this count structure?
* @return Total bases represented. * @return Total bases represented.
*/ */
public int getTotal() { public long getTotal() {
int accumulator = 0; int accumulator = 0;
for(byte base: Bases.instance) { for(byte base: Bases.instance) {
accumulator += get(base); accumulator += get(base);

View File

@ -65,7 +65,7 @@ public class CreateBWTFromReference {
return occurrences; return occurrences;
} }
private int[] createSuffixArray( String sequence ) { private long[] createSuffixArray( String sequence ) {
TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>( new SuffixArrayComparator(sequence) ); TreeSet<Integer> suffixArrayBuilder = new TreeSet<Integer>( new SuffixArrayComparator(sequence) );
// Build out the suffix array using a custom comparator. // Build out the suffix array using a custom comparator.
@ -77,7 +77,7 @@ public class CreateBWTFromReference {
} }
// Copy the suffix array into an int array. // Copy the suffix array into an int array.
int[] suffixArray = new int[suffixArrayBuilder.size()]; long[] suffixArray = new long[suffixArrayBuilder.size()];
int i = 0; int i = 0;
for( Integer element: suffixArrayBuilder ) for( Integer element: suffixArrayBuilder )
suffixArray[i++] = element; suffixArray[i++] = element;
@ -85,35 +85,35 @@ public class CreateBWTFromReference {
return suffixArray; return suffixArray;
} }
private int[] invertSuffixArray( int[] suffixArray ) { private long[] invertSuffixArray( long[] suffixArray ) {
int[] inverseSuffixArray = new int[suffixArray.length]; long[] inverseSuffixArray = new long[suffixArray.length];
for( int i = 0; i < suffixArray.length; i++ ) for( int i = 0; i < suffixArray.length; i++ )
inverseSuffixArray[suffixArray[i]] = i; inverseSuffixArray[(int)suffixArray[i]] = i;
return inverseSuffixArray; return inverseSuffixArray;
} }
private int[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) { private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) {
int[] compressedSuffixArray = new int[suffixArray.length]; long[] compressedSuffixArray = new long[suffixArray.length];
compressedSuffixArray[0] = inverseSuffixArray[0]; compressedSuffixArray[0] = inverseSuffixArray[0];
for( int i = 1; i < suffixArray.length; i++ ) for( int i = 1; i < suffixArray.length; i++ )
compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1]; compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1];
return compressedSuffixArray; return compressedSuffixArray;
} }
private int[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) {
int[] inverseCompressedSuffixArray = new int[compressedSuffixArray.length]; long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length];
for( int i = 0; i < compressedSuffixArray.length; i++ ) for( int i = 0; i < compressedSuffixArray.length; i++ )
inverseCompressedSuffixArray[compressedSuffixArray[i]] = i; inverseCompressedSuffixArray[compressedSuffixArray[i]] = i;
return inverseCompressedSuffixArray; return inverseCompressedSuffixArray;
} }
private byte[] createBWT( String sequence, int[] suffixArray ) { private byte[] createBWT( String sequence, long[] suffixArray ) {
byte[] bwt = new byte[suffixArray.length-1]; byte[] bwt = new byte[suffixArray.length-1];
int i = 0; int i = 0;
for( int suffixArrayEntry: suffixArray ) { for( long suffixArrayEntry: suffixArray ) {
if( suffixArrayEntry == 0 ) if( suffixArrayEntry == 0 )
continue; continue;
bwt[i++] = (byte)sequence.charAt(suffixArrayEntry-1); bwt[i++] = (byte)sequence.charAt((int)suffixArrayEntry-1);
} }
return bwt; return bwt;
} }
@ -152,12 +152,12 @@ public class CreateBWTFromReference {
occurrences.getCumulative(Bases.T)); occurrences.getCumulative(Bases.T));
// Generate the suffix array and print diagnostics. // Generate the suffix array and print diagnostics.
int[] suffixArrayData = creator.createSuffixArray(sequence); long[] suffixArrayData = creator.createSuffixArray(sequence);
int[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence); long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence);
// Invert the suffix array and print diagnostics. // Invert the suffix array and print diagnostics.
int[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData); long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData);
int[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData); long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData);
SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData ); SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData );
SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData ); SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData );
@ -186,7 +186,7 @@ public class CreateBWTFromReference {
*/ */
// Create the BWT. // Create the BWT.
BWT bwt = new BWT( inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence) ); BWT bwt = new BWT(inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence));
BWT reverseBWT = new BWT( reverseInverseSuffixArray[0], occurrences, creator.createBWT(reverseSequence, reverseSuffixArray.sequence)); BWT reverseBWT = new BWT( reverseInverseSuffixArray[0], occurrences, creator.createBWT(reverseSequence, reverseSuffixArray.sequence));
byte[] bwtSequence = bwt.getSequence(); byte[] bwtSequence = bwt.getSequence();

View File

@ -9,13 +9,13 @@ import org.broadinstitute.sting.utils.StingException;
* @version 0.1 * @version 0.1
*/ */
public class SuffixArray { public class SuffixArray {
public final int inverseSA0; public final long inverseSA0;
public final Counts occurrences; public final Counts occurrences;
/** /**
* The elements of the sequence actually stored in memory. * The elements of the sequence actually stored in memory.
*/ */
protected final int[] sequence; protected final long[] sequence;
/** /**
* How often are individual elements in the sequence actually stored * How often are individual elements in the sequence actually stored
@ -28,7 +28,7 @@ public class SuffixArray {
*/ */
protected final BWT bwt; protected final BWT bwt;
public SuffixArray(int inverseSA0, Counts occurrences, int[] sequence) { public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) {
this(inverseSA0,occurrences,sequence,1,null); this(inverseSA0,occurrences,sequence,1,null);
} }
@ -37,8 +37,10 @@ public class SuffixArray {
* @param inverseSA0 Inverse SA entry for the first element. * @param inverseSA0 Inverse SA entry for the first element.
* @param occurrences Cumulative number of occurrences of A,C,G,T, in order. * @param occurrences Cumulative number of occurrences of A,C,G,T, in order.
* @param sequence The full suffix array. * @param sequence The full suffix array.
* @param sequenceInterval How frequently is the sequence interval stored.
* @param bwt bwt used to infer the remaining entries in the BWT.
*/ */
public SuffixArray(int inverseSA0, Counts occurrences, int[] sequence, int sequenceInterval, BWT bwt) { public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) {
this.inverseSA0 = inverseSA0; this.inverseSA0 = inverseSA0;
this.occurrences = occurrences; this.occurrences = occurrences;
this.sequence = sequence; this.sequence = sequence;
@ -53,7 +55,7 @@ public class SuffixArray {
* Retrieves the length of the sequence array. * Retrieves the length of the sequence array.
* @return Length of the suffix array. * @return Length of the suffix array.
*/ */
public int length() { public long length() {
if( bwt != null ) if( bwt != null )
return bwt.length()+1; return bwt.length()+1;
else else
@ -65,7 +67,7 @@ public class SuffixArray {
* @param index Index at which to retrieve the suffix array vaule. * @param index Index at which to retrieve the suffix array vaule.
* @return The suffix array value at that entry. * @return The suffix array value at that entry.
*/ */
public int get(int index) { public long get(long index) {
int iterations = 0; int iterations = 0;
while(index%sequenceInterval != 0) { while(index%sequenceInterval != 0) {
// The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case. // The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case.
@ -77,6 +79,6 @@ public class SuffixArray {
} }
iterations++; iterations++;
} }
return (sequence[index/sequenceInterval]+iterations) % length(); return (sequence[(int)(index/sequenceInterval)]+iterations) % length();
} }
} }

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.alignment.bwa.bwt; package org.broadinstitute.sting.alignment.bwa.bwt;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedInputStream; import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedInputStream;
import org.broadinstitute.sting.alignment.bwa.packing.PackUtils; import org.broadinstitute.sting.alignment.bwa.packing.PackUtils;
import java.io.*; import java.io.*;
@ -44,21 +44,21 @@ public class SuffixArrayReader {
* @return The suffix array stored in the input stream. * @return The suffix array stored in the input stream.
*/ */
public SuffixArray read() { public SuffixArray read() {
IntPackedInputStream intPackedInputStream = new IntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN);
int inverseSA0; long inverseSA0;
int[] occurrences; long[] occurrences;
int[] suffixArray; long[] suffixArray;
int suffixArrayInterval; int suffixArrayInterval;
try { try {
inverseSA0 = intPackedInputStream.read(); inverseSA0 = uintPackedInputStream.read();
occurrences = new int[PackUtils.ALPHABET_SIZE]; occurrences = new long[PackUtils.ALPHABET_SIZE];
intPackedInputStream.read(occurrences); uintPackedInputStream.read(occurrences);
// Throw away the suffix array size in bytes and use the occurrences table directly. // Throw away the suffix array size in bytes and use the occurrences table directly.
suffixArrayInterval = intPackedInputStream.read(); suffixArrayInterval = (int)uintPackedInputStream.read();
suffixArray = new int[(occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval]; suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)];
intPackedInputStream.read(suffixArray); uintPackedInputStream.read(suffixArray);
} }
catch( IOException ex ) { catch( IOException ex ) {
throw new StingException("Unable to read BWT from input stream.", ex); throw new StingException("Unable to read BWT from input stream.", ex);

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.alignment.bwa.bwt; package org.broadinstitute.sting.alignment.bwa.bwt;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.alignment.bwa.packing.IntPackedOutputStream; import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedOutputStream;
import java.io.*; import java.io.*;
import java.nio.ByteOrder; import java.nio.ByteOrder;
@ -36,16 +36,16 @@ public class SuffixArrayWriter {
* @param suffixArray suffix array to write. * @param suffixArray suffix array to write.
*/ */
public void write(SuffixArray suffixArray) { public void write(SuffixArray suffixArray) {
IntPackedOutputStream intPackedOutputStream = new IntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN);
try { try {
intPackedOutputStream.write(suffixArray.inverseSA0); uintPackedOutputStream.write(suffixArray.inverseSA0);
intPackedOutputStream.write(suffixArray.occurrences.toArray(true)); uintPackedOutputStream.write(suffixArray.occurrences.toArray(true));
// How frequently the suffix array entry is placed. // How frequently the suffix array entry is placed.
intPackedOutputStream.write(1); uintPackedOutputStream.write(1);
// Length of the suffix array. // Length of the suffix array.
intPackedOutputStream.write(suffixArray.length()-1); uintPackedOutputStream.write(suffixArray.length()-1);
intPackedOutputStream.write(suffixArray.sequence, 1, suffixArray.length()-1); uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length);
} }
catch( IOException ex ) { catch( IOException ex ) {
throw new StingException("Unable to read BWT from input stream.", ex); throw new StingException("Unable to read BWT from input stream.", ex);

View File

@ -105,7 +105,7 @@ public class PackUtils {
* @param partitionSize Size of an individual partition. * @param partitionSize Size of an individual partition.
* @return Number of partitions that would be created. * @return Number of partitions that would be created.
*/ */
public static int numberOfPartitions( int size, int partitionSize ) { public static int numberOfPartitions( long size, long partitionSize ) {
return (size + partitionSize - 1)/partitionSize; return (int)((size+partitionSize-1) / partitionSize);
} }
} }

View File

@ -11,7 +11,7 @@ import java.nio.channels.FileChannel;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class IntPackedInputStream { public class UnsignedIntPackedInputStream {
/** /**
* Ultimate target for the occurrence array. * Ultimate target for the occurrence array.
*/ */
@ -38,7 +38,7 @@ public class IntPackedInputStream {
* @param byteOrder Endianness to use when writing a list of integers. * @param byteOrder Endianness to use when writing a list of integers.
* @throws java.io.IOException if an I/O error occurs. * @throws java.io.IOException if an I/O error occurs.
*/ */
public IntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException {
this(new FileInputStream(inputFile),byteOrder); this(new FileInputStream(inputFile),byteOrder);
} }
@ -47,7 +47,7 @@ public class IntPackedInputStream {
* @param inputStream Input stream from which to read ints. * @param inputStream Input stream from which to read ints.
* @param byteOrder Endianness to use when writing a list of integers. * @param byteOrder Endianness to use when writing a list of integers.
*/ */
public IntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) { public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) {
this.targetInputStream = inputStream; this.targetInputStream = inputStream;
this.targetInputChannel = inputStream.getChannel(); this.targetInputChannel = inputStream.getChannel();
this.byteOrder = byteOrder; this.byteOrder = byteOrder;
@ -58,8 +58,8 @@ public class IntPackedInputStream {
* @return The next input datum in the stream. * @return The next input datum in the stream.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public int read() throws IOException { public long read() throws IOException {
int[] data = new int[1]; long[] data = new long[1];
read(data); read(data);
return data[0]; return data[0];
} }
@ -69,7 +69,7 @@ public class IntPackedInputStream {
* @param data placeholder for input data. * @param data placeholder for input data.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public void read( int[] data ) throws IOException { public void read( long[] data ) throws IOException {
read( data, 0, data.length ); read( data, 0, data.length );
} }
@ -80,7 +80,7 @@ public class IntPackedInputStream {
* @param length number of ints to read in. * @param length number of ints to read in.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public void read( int[] data, int offset, int length ) throws IOException { public void read( long[] data, int offset, int length ) throws IOException {
ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder); ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder);
targetInputChannel.read(readBuffer,targetInputChannel.position()); targetInputChannel.read(readBuffer,targetInputChannel.position());

View File

@ -35,7 +35,7 @@ import java.nio.ByteOrder;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class IntPackedOutputStream { public class UnsignedIntPackedOutputStream {
/** /**
* Ultimate target for the occurrence array. * Ultimate target for the occurrence array.
*/ */
@ -52,7 +52,7 @@ public class IntPackedOutputStream {
* @param byteOrder Endianness to use when writing a list of integers. * @param byteOrder Endianness to use when writing a list of integers.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public IntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException {
this(new FileOutputStream(outputFile),byteOrder); this(new FileOutputStream(outputFile),byteOrder);
} }
@ -60,9 +60,8 @@ public class IntPackedOutputStream {
* Write packed ints to the given OutputStream. * Write packed ints to the given OutputStream.
* @param outputStream Output stream to which to write packed ints. * @param outputStream Output stream to which to write packed ints.
* @param byteOrder Endianness to use when writing a list of integers. * @param byteOrder Endianness to use when writing a list of integers.
* @throws IOException if an I/O error occurs.
*/ */
public IntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) { public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) {
this.targetOutputStream = outputStream; this.targetOutputStream = outputStream;
buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder);
} }
@ -72,9 +71,9 @@ public class IntPackedOutputStream {
* @param datum datum to write. * @param datum datum to write.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public void write( int datum ) throws IOException { public void write( long datum ) throws IOException {
buffer.rewind(); buffer.rewind();
buffer.putInt(datum); buffer.putInt((int)datum);
targetOutputStream.write(buffer.array()); targetOutputStream.write(buffer.array());
} }
@ -83,8 +82,8 @@ public class IntPackedOutputStream {
* @param data data to write. occurrences.length must match alphabet size. * @param data data to write. occurrences.length must match alphabet size.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public void write( int[] data ) throws IOException { public void write( long[] data ) throws IOException {
for(int datum: data) for(long datum: data)
write(datum); write(datum);
} }
@ -95,7 +94,7 @@ public class IntPackedOutputStream {
* @param length number of ints to write. * @param length number of ints to write.
* @throws IOException if an I/O error occurs. * @throws IOException if an I/O error occurs.
*/ */
public void write( int[] data, int offset, int length ) throws IOException { public void write( long[] data, int offset, int length ) throws IOException {
for( int i = offset; i < offset+length; i++ ) for( int i = offset; i < offset+length; i++ )
write(data[i]); write(data[i]);
} }