diff --git a/java/src/org/broadinstitute/sting/alignment/Alignment.java b/java/src/org/broadinstitute/sting/alignment/Alignment.java index 9ca765c5c..8fd73e656 100644 --- a/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -17,19 +17,11 @@ public interface Alignment extends Comparable { * Gets the starting position for the given alignment. * @return Starting position. */ - public int getAlignmentStart(); + public long getAlignmentStart(); /** * Gets the score of this alignment. * @return The score. */ public int getScore(); - - /** - * Temporary getters. - * @return - */ - public int getMismatches(); - public int getGapOpens(); - public int getGapExtensions(); } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 4d6d502c9..fa3e7acd4 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -124,7 +124,7 @@ public class BWAAligner implements Aligner { // Found a valid alignment; store it and move on. if(alignment.position >= read.getReadLength()-1) { - for( int bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++ ) { + for(long bwtIndex = alignment.loBound; bwtIndex <= alignment.hiBound; bwtIndex++) { BWAAlignment finalAlignment = alignment.clone(); if( finalAlignment.isNegativeStrand() ) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java index fc68773c6..10b558d60 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAlignment.java @@ -30,7 +30,7 @@ public class BWAAlignment implements Alignment, Cloneable { /** * Start of the final alignment. */ - protected int alignmentStart; + protected long alignmentStart; /** * Is this match being treated as a negative or positive strand? @@ -65,12 +65,12 @@ public class BWAAlignment implements Alignment, Cloneable { /** * Working variable. The lower bound of the alignment within the BWT. */ - protected int loBound; + protected long loBound; /** * Working variable. The upper bound of the alignment within the BWT. */ - protected int hiBound; + protected long hiBound; /** * Cache the score. @@ -81,7 +81,7 @@ public class BWAAlignment implements Alignment, Cloneable { * Gets the starting position for the given alignment. * @return Starting position. */ - public int getAlignmentStart() { + public long getAlignmentStart() { return alignmentStart; } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java b/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java index 31ca7f5bc..7e4510128 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java @@ -3,7 +3,6 @@ package org.broadinstitute.sting.alignment.bwa; import java.util.List; import java.util.ArrayList; -import org.broadinstitute.sting.alignment.bwa.bwt.Bases; import org.broadinstitute.sting.alignment.bwa.bwt.BWT; /** @@ -17,17 +16,17 @@ public class LowerBound { /** * Lower bound of the suffix array. */ - public final int loIndex; + public final long loIndex; /** * Upper bound of the suffix array. */ - public final int hiIndex; + public final long hiIndex; /** * Width of the bwt from loIndex -> hiIndex, inclusive. */ - public final int width; + public final long width; /** * The lower bound at the given point. @@ -36,9 +35,11 @@ public class LowerBound { /** * Create a new lower bound with the given value. + * @param loIndex The lower bound of the BWT. + * @param hiIndex The upper bound of the BWT. * @param value Value for the lower bound at this site. */ - private LowerBound(int loIndex, int hiIndex, int value) { + private LowerBound(long loIndex, long hiIndex, int value) { this.loIndex = loIndex; this.hiIndex = hiIndex; this.width = hiIndex - loIndex + 1; @@ -47,11 +48,16 @@ public class LowerBound { /** * Create a non-optimal bound according to the algorithm specified in Figure 3 of the BWA paper. + * @param bases Bases of the read to use when creating a new BWT. + * @param bwt BWT to check against. + * @return A list of lower bounds at every point in the reference. + * */ - public static List create( Byte[] bases, BWT bwt ) { + public static List create(Byte[] bases, BWT bwt) { List bounds = new ArrayList(); - int loIndex = 0, hiIndex = bwt.length(), mismatches = 0; + long loIndex = 0, hiIndex = bwt.length(); + int mismatches = 0; for( int i = bases.length-1; i >= 0; i-- ) { Byte base = bases[i]; diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWT.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWT.java index 6a4103068..db708797f 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWT.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWT.java @@ -19,7 +19,7 @@ public class BWT { /** * The inverse SA, used as a placeholder for determining where the special EOL character sits. */ - protected final int inverseSA0; + protected final long inverseSA0; /** * Cumulative counts for the entire BWT. @@ -37,7 +37,7 @@ public class BWT { * @param counts Cumulative count of bases, in A,C,G,T order. * @param sequenceBlocks The full BWT sequence, sans the '$'. */ - public BWT( int inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) { + public BWT( long inverseSA0, Counts counts, SequenceBlock[] sequenceBlocks ) { this.inverseSA0 = inverseSA0; this.counts = counts; this.sequenceBlocks = sequenceBlocks; @@ -49,7 +49,7 @@ public class BWT { * @param counts Count of bases, in A,C,G,T order. * @param sequence The full BWT sequence, sans the '$'. */ - public BWT( int inverseSA0, Counts counts, byte[] sequence ) { + public BWT( long inverseSA0, Counts counts, byte[] sequence ) { this(inverseSA0,counts,generateSequenceBlocks(sequence)); } @@ -58,7 +58,7 @@ public class BWT { * @return The full BWT string as a byte array. */ public byte[] getSequence() { - byte[] sequence = new byte[counts.getTotal()]; + byte[] sequence = new byte[(int)counts.getTotal()]; for( SequenceBlock block: sequenceBlocks ) System.arraycopy(block.sequence,0,sequence,block.sequenceStart,block.sequenceLength); return sequence; @@ -69,7 +69,7 @@ public class BWT { * @param base The base. * @return Total counts for all bases lexicographically smaller than this base. */ - public int counts(byte base) { + public long counts(byte base) { return counts.getCumulative(base); } @@ -79,10 +79,10 @@ public class BWT { * @param index The position to search within the BWT. * @return Total counts for all bases lexicographically smaller than this base. */ - public int occurrences(byte base,int index) { + public long occurrences(byte base,long index) { SequenceBlock block = getSequenceBlock(index); int position = getSequencePosition(index); - int accumulator = block.occurrences.get(base); + long accumulator = block.occurrences.get(base); for(int i = 0; i <= position; i++) { if(base == block.sequence[i]) accumulator++; @@ -94,7 +94,7 @@ public class BWT { * The number of bases in the BWT as a whole. * @return Number of bases. */ - public int length() { + public long length() { return counts.getTotal(); } @@ -103,7 +103,7 @@ public class BWT { * @param index The index to use. * @return The base at that location. */ - protected byte getBase(int index) { + protected byte getBase(long index) { if(index == inverseSA0) throw new StingException(String.format("Base at index %d does not have a text representation",index)); @@ -112,16 +112,16 @@ public class BWT { return block.sequence[position]; } - private SequenceBlock getSequenceBlock(int index) { + private SequenceBlock getSequenceBlock(long index) { // If the index is above the SA-1[0], remap it to the appropriate coordinate space. if(index > inverseSA0) index--; - return sequenceBlocks[index/SEQUENCE_BLOCK_SIZE]; + return sequenceBlocks[(int)(index/SEQUENCE_BLOCK_SIZE)]; } - private int getSequencePosition(int index) { + private int getSequencePosition(long index) { // If the index is above the SA-1[0], remap it to the appropriate coordinate space. if(index > inverseSA0) index--; - return index%SEQUENCE_BLOCK_SIZE; + return (int)(index%SEQUENCE_BLOCK_SIZE); } /** diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTReader.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTReader.java index cf5b2b4a8..9b0d70840 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTReader.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTReader.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.alignment.bwa.bwt; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.alignment.bwa.packing.IntPackedInputStream; +import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedInputStream; import org.broadinstitute.sting.alignment.bwa.packing.BasePackedInputStream; import org.broadinstitute.sting.alignment.bwa.packing.PackUtils; @@ -37,29 +37,29 @@ public class BWTReader { * @return The BWT stored in the input stream. */ public BWT read() { - IntPackedInputStream intPackedInputStream = new IntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); BasePackedInputStream basePackedInputStream = new BasePackedInputStream(Integer.class, inputStream, ByteOrder.LITTLE_ENDIAN); - int inverseSA0; - int[] count; + long inverseSA0; + long[] count; SequenceBlock[] sequenceBlocks; try { - inverseSA0 = intPackedInputStream.read(); - count = new int[PackUtils.ALPHABET_SIZE]; - intPackedInputStream.read(count); + inverseSA0 = uintPackedInputStream.read(); + count = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(count); - int bwtSize = count[PackUtils.ALPHABET_SIZE-1]; + long bwtSize = count[PackUtils.ALPHABET_SIZE-1]; sequenceBlocks = new SequenceBlock[PackUtils.numberOfPartitions(bwtSize,BWT.SEQUENCE_BLOCK_SIZE)]; for( int block = 0; block < sequenceBlocks.length; block++ ) { int sequenceStart = block* BWT.SEQUENCE_BLOCK_SIZE; - int sequenceLength = Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart); + int sequenceLength = (int)Math.min(BWT.SEQUENCE_BLOCK_SIZE,bwtSize-sequenceStart); - int[] occurrences = new int[PackUtils.ALPHABET_SIZE]; + long[] occurrences = new long[PackUtils.ALPHABET_SIZE]; byte[] bwt = new byte[sequenceLength]; - intPackedInputStream.read(occurrences); + uintPackedInputStream.read(occurrences); basePackedInputStream.read(bwt); sequenceBlocks[block] = new SequenceBlock(sequenceStart,sequenceLength,new Counts(occurrences,false),bwt); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTWriter.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTWriter.java index a8f810aa7..32b94acf2 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTWriter.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/BWTWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.alignment.bwa.bwt; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.alignment.bwa.packing.IntPackedOutputStream; +import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedOutputStream; import org.broadinstitute.sting.alignment.bwa.packing.BasePackedOutputStream; import java.io.*; @@ -37,7 +37,7 @@ public class BWTWriter { * @param bwt Transform to be written to the output stream. */ public void write( BWT bwt ) { - IntPackedOutputStream intPackedOutputStream = new IntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + UnsignedIntPackedOutputStream intPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); BasePackedOutputStream basePackedOutputStream = new BasePackedOutputStream(Integer.class, outputStream, ByteOrder.LITTLE_ENDIAN); try { diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/Counts.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/Counts.java index 5a70848ca..e2d0d92b9 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/Counts.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/Counts.java @@ -15,12 +15,12 @@ public class Counts implements Cloneable { /** * Internal representation of counts, broken down by ASCII value. */ - private Map counts = new HashMap(); + private Map counts = new HashMap(); /** * Internal representation of cumulative counts, broken down by ASCII value. */ - private Map cumulativeCounts = new HashMap(); + private Map cumulativeCounts = new HashMap(); /** * Create an empty Counts object with values A=0,C=0,G=0,T=0. @@ -28,8 +28,8 @@ public class Counts implements Cloneable { public Counts() { for(byte base: Bases.instance) { - counts.put(base,0); - cumulativeCounts.put(base,0); + counts.put(base,0L); + cumulativeCounts.put(base,0L); } } @@ -38,20 +38,20 @@ public class Counts implements Cloneable { * @param data Count data, broken down by base. * @param cumulative Whether the counts are cumulative, (count_G=numA+numC+numG,for example). */ - public Counts( int[] data, boolean cumulative ) { + public Counts( long[] data, boolean cumulative ) { if(cumulative) { - int priorCount = 0; + long priorCount = 0; for(byte base: Bases.instance) { - int count = data[Bases.toPack(base)]; + long count = data[Bases.toPack(base)]; counts.put(base,count-priorCount); cumulativeCounts.put(base,priorCount); priorCount = count; } } else { - int priorCount = 0; + long priorCount = 0; for(byte base: Bases.instance) { - int count = data[Bases.toPack(base)]; + long count = data[Bases.toPack(base)]; counts.put(base,count); cumulativeCounts.put(base,priorCount); priorCount += count; @@ -64,8 +64,8 @@ public class Counts implements Cloneable { * @param cumulative Use a cumulative representation. * @return Array of count values. */ - public int[] toArray(boolean cumulative) { - int[] countArray = new int[counts.size()]; + public long[] toArray(boolean cumulative) { + long[] countArray = new long[counts.size()]; if(cumulative) { int index = 0; boolean first = true; @@ -98,8 +98,8 @@ public class Counts implements Cloneable { catch(CloneNotSupportedException ex) { throw new StingException("Unable to clone counts object", ex); } - other.counts = new HashMap(counts); - other.cumulativeCounts = new HashMap(cumulativeCounts); + other.counts = new HashMap(counts); + other.cumulativeCounts = new HashMap(cumulativeCounts); return other; } @@ -123,7 +123,7 @@ public class Counts implements Cloneable { * @param base Base for which to query counts. * @return Number of bases of this type seen. */ - public int get(byte base) { + public long get(byte base) { return counts.get(base); } @@ -133,7 +133,7 @@ public class Counts implements Cloneable { * @param base Base for which to query counts. * @return Number of bases of this type seen. */ - public int getCumulative(byte base) { + public long getCumulative(byte base) { return cumulativeCounts.get(base); } @@ -141,7 +141,7 @@ public class Counts implements Cloneable { * How many total bases are represented by this count structure? * @return Total bases represented. */ - public int getTotal() { + public long getTotal() { int accumulator = 0; for(byte base: Bases.instance) { accumulator += get(base); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/CreateBWTFromReference.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/CreateBWTFromReference.java index db3aa38b2..dfebe0258 100755 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/CreateBWTFromReference.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/CreateBWTFromReference.java @@ -65,7 +65,7 @@ public class CreateBWTFromReference { return occurrences; } - private int[] createSuffixArray( String sequence ) { + private long[] createSuffixArray( String sequence ) { TreeSet suffixArrayBuilder = new TreeSet( new SuffixArrayComparator(sequence) ); // Build out the suffix array using a custom comparator. @@ -77,7 +77,7 @@ public class CreateBWTFromReference { } // Copy the suffix array into an int array. - int[] suffixArray = new int[suffixArrayBuilder.size()]; + long[] suffixArray = new long[suffixArrayBuilder.size()]; int i = 0; for( Integer element: suffixArrayBuilder ) suffixArray[i++] = element; @@ -85,35 +85,35 @@ public class CreateBWTFromReference { return suffixArray; } - private int[] invertSuffixArray( int[] suffixArray ) { - int[] inverseSuffixArray = new int[suffixArray.length]; + private long[] invertSuffixArray( long[] suffixArray ) { + long[] inverseSuffixArray = new long[suffixArray.length]; for( int i = 0; i < suffixArray.length; i++ ) - inverseSuffixArray[suffixArray[i]] = i; + inverseSuffixArray[(int)suffixArray[i]] = i; return inverseSuffixArray; } - private int[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) { - int[] compressedSuffixArray = new int[suffixArray.length]; + private long[] createCompressedSuffixArray( int[] suffixArray, int[] inverseSuffixArray ) { + long[] compressedSuffixArray = new long[suffixArray.length]; compressedSuffixArray[0] = inverseSuffixArray[0]; for( int i = 1; i < suffixArray.length; i++ ) compressedSuffixArray[i] = inverseSuffixArray[suffixArray[i]+1]; return compressedSuffixArray; } - private int[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { - int[] inverseCompressedSuffixArray = new int[compressedSuffixArray.length]; + private long[] createInversedCompressedSuffixArray( int[] compressedSuffixArray ) { + long[] inverseCompressedSuffixArray = new long[compressedSuffixArray.length]; for( int i = 0; i < compressedSuffixArray.length; i++ ) inverseCompressedSuffixArray[compressedSuffixArray[i]] = i; return inverseCompressedSuffixArray; } - private byte[] createBWT( String sequence, int[] suffixArray ) { + private byte[] createBWT( String sequence, long[] suffixArray ) { byte[] bwt = new byte[suffixArray.length-1]; int i = 0; - for( int suffixArrayEntry: suffixArray ) { + for( long suffixArrayEntry: suffixArray ) { if( suffixArrayEntry == 0 ) continue; - bwt[i++] = (byte)sequence.charAt(suffixArrayEntry-1); + bwt[i++] = (byte)sequence.charAt((int)suffixArrayEntry-1); } return bwt; } @@ -152,12 +152,12 @@ public class CreateBWTFromReference { occurrences.getCumulative(Bases.T)); // Generate the suffix array and print diagnostics. - int[] suffixArrayData = creator.createSuffixArray(sequence); - int[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence); + long[] suffixArrayData = creator.createSuffixArray(sequence); + long[] reverseSuffixArrayData = creator.createSuffixArray(reverseSequence); // Invert the suffix array and print diagnostics. - int[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData); - int[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData); + long[] inverseSuffixArray = creator.invertSuffixArray(suffixArrayData); + long[] reverseInverseSuffixArray = creator.invertSuffixArray(reverseSuffixArrayData); SuffixArray suffixArray = new SuffixArray( inverseSuffixArray[0], occurrences, suffixArrayData ); SuffixArray reverseSuffixArray = new SuffixArray( reverseInverseSuffixArray[0], occurrences, reverseSuffixArrayData ); @@ -186,7 +186,7 @@ public class CreateBWTFromReference { */ // Create the BWT. - BWT bwt = new BWT( inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence) ); + BWT bwt = new BWT(inverseSuffixArray[0], occurrences, creator.createBWT(sequence, suffixArray.sequence)); BWT reverseBWT = new BWT( reverseInverseSuffixArray[0], occurrences, creator.createBWT(reverseSequence, reverseSuffixArray.sequence)); byte[] bwtSequence = bwt.getSequence(); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArray.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArray.java index a0db8e920..65696fa92 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArray.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArray.java @@ -9,13 +9,13 @@ import org.broadinstitute.sting.utils.StingException; * @version 0.1 */ public class SuffixArray { - public final int inverseSA0; + public final long inverseSA0; public final Counts occurrences; /** * The elements of the sequence actually stored in memory. */ - protected final int[] sequence; + protected final long[] sequence; /** * How often are individual elements in the sequence actually stored @@ -28,7 +28,7 @@ public class SuffixArray { */ protected final BWT bwt; - public SuffixArray(int inverseSA0, Counts occurrences, int[] sequence) { + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence) { this(inverseSA0,occurrences,sequence,1,null); } @@ -37,8 +37,10 @@ public class SuffixArray { * @param inverseSA0 Inverse SA entry for the first element. * @param occurrences Cumulative number of occurrences of A,C,G,T, in order. * @param sequence The full suffix array. + * @param sequenceInterval How frequently is the sequence interval stored. + * @param bwt bwt used to infer the remaining entries in the BWT. */ - public SuffixArray(int inverseSA0, Counts occurrences, int[] sequence, int sequenceInterval, BWT bwt) { + public SuffixArray(long inverseSA0, Counts occurrences, long[] sequence, int sequenceInterval, BWT bwt) { this.inverseSA0 = inverseSA0; this.occurrences = occurrences; this.sequence = sequence; @@ -53,7 +55,7 @@ public class SuffixArray { * Retrieves the length of the sequence array. * @return Length of the suffix array. */ - public int length() { + public long length() { if( bwt != null ) return bwt.length()+1; else @@ -65,7 +67,7 @@ public class SuffixArray { * @param index Index at which to retrieve the suffix array vaule. * @return The suffix array value at that entry. */ - public int get(int index) { + public long get(long index) { int iterations = 0; while(index%sequenceInterval != 0) { // The inverseSA0 ('$') doesn't have a usable ASCII representation; it must be treated as a special case. @@ -77,6 +79,6 @@ public class SuffixArray { } iterations++; } - return (sequence[index/sequenceInterval]+iterations) % length(); + return (sequence[(int)(index/sequenceInterval)]+iterations) % length(); } } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayReader.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayReader.java index 3f6b47cce..d7d92dfa5 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayReader.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayReader.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.alignment.bwa.bwt; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.alignment.bwa.packing.IntPackedInputStream; +import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedInputStream; import org.broadinstitute.sting.alignment.bwa.packing.PackUtils; import java.io.*; @@ -44,21 +44,21 @@ public class SuffixArrayReader { * @return The suffix array stored in the input stream. */ public SuffixArray read() { - IntPackedInputStream intPackedInputStream = new IntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); + UnsignedIntPackedInputStream uintPackedInputStream = new UnsignedIntPackedInputStream(inputStream, ByteOrder.LITTLE_ENDIAN); - int inverseSA0; - int[] occurrences; - int[] suffixArray; + long inverseSA0; + long[] occurrences; + long[] suffixArray; int suffixArrayInterval; try { - inverseSA0 = intPackedInputStream.read(); - occurrences = new int[PackUtils.ALPHABET_SIZE]; - intPackedInputStream.read(occurrences); + inverseSA0 = uintPackedInputStream.read(); + occurrences = new long[PackUtils.ALPHABET_SIZE]; + uintPackedInputStream.read(occurrences); // Throw away the suffix array size in bytes and use the occurrences table directly. - suffixArrayInterval = intPackedInputStream.read(); - suffixArray = new int[(occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval]; - intPackedInputStream.read(suffixArray); + suffixArrayInterval = (int)uintPackedInputStream.read(); + suffixArray = new long[(int)((occurrences[occurrences.length-1]+suffixArrayInterval-1)/suffixArrayInterval)]; + uintPackedInputStream.read(suffixArray); } catch( IOException ex ) { throw new StingException("Unable to read BWT from input stream.", ex); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayWriter.java b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayWriter.java index c0dabd04b..d05f576b7 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayWriter.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/bwt/SuffixArrayWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.alignment.bwa.bwt; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.alignment.bwa.packing.IntPackedOutputStream; +import org.broadinstitute.sting.alignment.bwa.packing.UnsignedIntPackedOutputStream; import java.io.*; import java.nio.ByteOrder; @@ -36,16 +36,16 @@ public class SuffixArrayWriter { * @param suffixArray suffix array to write. */ public void write(SuffixArray suffixArray) { - IntPackedOutputStream intPackedOutputStream = new IntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); + UnsignedIntPackedOutputStream uintPackedOutputStream = new UnsignedIntPackedOutputStream(outputStream, ByteOrder.LITTLE_ENDIAN); try { - intPackedOutputStream.write(suffixArray.inverseSA0); - intPackedOutputStream.write(suffixArray.occurrences.toArray(true)); + uintPackedOutputStream.write(suffixArray.inverseSA0); + uintPackedOutputStream.write(suffixArray.occurrences.toArray(true)); // How frequently the suffix array entry is placed. - intPackedOutputStream.write(1); + uintPackedOutputStream.write(1); // Length of the suffix array. - intPackedOutputStream.write(suffixArray.length()-1); - intPackedOutputStream.write(suffixArray.sequence, 1, suffixArray.length()-1); + uintPackedOutputStream.write(suffixArray.length()-1); + uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length); } catch( IOException ex ) { throw new StingException("Unable to read BWT from input stream.", ex); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/packing/PackUtils.java b/java/src/org/broadinstitute/sting/alignment/bwa/packing/PackUtils.java index 6fd175652..297bd3012 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/packing/PackUtils.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/packing/PackUtils.java @@ -105,7 +105,7 @@ public class PackUtils { * @param partitionSize Size of an individual partition. * @return Number of partitions that would be created. */ - public static int numberOfPartitions( int size, int partitionSize ) { - return (size + partitionSize - 1)/partitionSize; + public static int numberOfPartitions( long size, long partitionSize ) { + return (int)((size+partitionSize-1) / partitionSize); } } diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/packing/IntPackedInputStream.java b/java/src/org/broadinstitute/sting/alignment/bwa/packing/UnsignedIntPackedInputStream.java similarity index 85% rename from java/src/org/broadinstitute/sting/alignment/bwa/packing/IntPackedInputStream.java rename to java/src/org/broadinstitute/sting/alignment/bwa/packing/UnsignedIntPackedInputStream.java index 27aba4460..f9861a717 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/packing/IntPackedInputStream.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/packing/UnsignedIntPackedInputStream.java @@ -11,7 +11,7 @@ import java.nio.channels.FileChannel; * @author mhanna * @version 0.1 */ -public class IntPackedInputStream { +public class UnsignedIntPackedInputStream { /** * Ultimate target for the occurrence array. */ @@ -38,7 +38,7 @@ public class IntPackedInputStream { * @param byteOrder Endianness to use when writing a list of integers. * @throws java.io.IOException if an I/O error occurs. */ - public IntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { + public UnsignedIntPackedInputStream(File inputFile, ByteOrder byteOrder) throws IOException { this(new FileInputStream(inputFile),byteOrder); } @@ -47,7 +47,7 @@ public class IntPackedInputStream { * @param inputStream Input stream from which to read ints. * @param byteOrder Endianness to use when writing a list of integers. */ - public IntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) { + public UnsignedIntPackedInputStream(FileInputStream inputStream, ByteOrder byteOrder) { this.targetInputStream = inputStream; this.targetInputChannel = inputStream.getChannel(); this.byteOrder = byteOrder; @@ -58,8 +58,8 @@ public class IntPackedInputStream { * @return The next input datum in the stream. * @throws IOException if an I/O error occurs. */ - public int read() throws IOException { - int[] data = new int[1]; + public long read() throws IOException { + long[] data = new long[1]; read(data); return data[0]; } @@ -69,7 +69,7 @@ public class IntPackedInputStream { * @param data placeholder for input data. * @throws IOException if an I/O error occurs. */ - public void read( int[] data ) throws IOException { + public void read( long[] data ) throws IOException { read( data, 0, data.length ); } @@ -80,7 +80,7 @@ public class IntPackedInputStream { * @param length number of ints to read in. * @throws IOException if an I/O error occurs. */ - public void read( int[] data, int offset, int length ) throws IOException { + public void read( long[] data, int offset, int length ) throws IOException { ByteBuffer readBuffer = ByteBuffer.allocate(bytesPerInteger*length).order(byteOrder); targetInputChannel.read(readBuffer,targetInputChannel.position()); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/packing/IntPackedOutputStream.java b/java/src/org/broadinstitute/sting/alignment/bwa/packing/UnsignedIntPackedOutputStream.java similarity index 87% rename from java/src/org/broadinstitute/sting/alignment/bwa/packing/IntPackedOutputStream.java rename to java/src/org/broadinstitute/sting/alignment/bwa/packing/UnsignedIntPackedOutputStream.java index 3933f6b5c..700fe141f 100755 --- a/java/src/org/broadinstitute/sting/alignment/bwa/packing/IntPackedOutputStream.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/packing/UnsignedIntPackedOutputStream.java @@ -35,7 +35,7 @@ import java.nio.ByteOrder; * @author mhanna * @version 0.1 */ -public class IntPackedOutputStream { +public class UnsignedIntPackedOutputStream { /** * Ultimate target for the occurrence array. */ @@ -52,7 +52,7 @@ public class IntPackedOutputStream { * @param byteOrder Endianness to use when writing a list of integers. * @throws IOException if an I/O error occurs. */ - public IntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { + public UnsignedIntPackedOutputStream(File outputFile, ByteOrder byteOrder) throws IOException { this(new FileOutputStream(outputFile),byteOrder); } @@ -60,9 +60,8 @@ public class IntPackedOutputStream { * Write packed ints to the given OutputStream. * @param outputStream Output stream to which to write packed ints. * @param byteOrder Endianness to use when writing a list of integers. - * @throws IOException if an I/O error occurs. */ - public IntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) { + public UnsignedIntPackedOutputStream(OutputStream outputStream, ByteOrder byteOrder) { this.targetOutputStream = outputStream; buffer = ByteBuffer.allocate(PackUtils.bitsInType(Integer.class)/PackUtils.BITS_PER_BYTE).order(byteOrder); } @@ -72,9 +71,9 @@ public class IntPackedOutputStream { * @param datum datum to write. * @throws IOException if an I/O error occurs. */ - public void write( int datum ) throws IOException { + public void write( long datum ) throws IOException { buffer.rewind(); - buffer.putInt(datum); + buffer.putInt((int)datum); targetOutputStream.write(buffer.array()); } @@ -83,8 +82,8 @@ public class IntPackedOutputStream { * @param data data to write. occurrences.length must match alphabet size. * @throws IOException if an I/O error occurs. */ - public void write( int[] data ) throws IOException { - for(int datum: data) + public void write( long[] data ) throws IOException { + for(long datum: data) write(datum); } @@ -95,7 +94,7 @@ public class IntPackedOutputStream { * @param length number of ints to write. * @throws IOException if an I/O error occurs. */ - public void write( int[] data, int offset, int length ) throws IOException { + public void write( long[] data, int offset, int length ) throws IOException { for( int i = offset; i < offset+length; i++ ) write(data[i]); }