diff --git a/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java b/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java index a3f0a0444..2a3999032 100755 --- a/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java +++ b/java/src/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFile.java @@ -118,21 +118,30 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile { } public ReferenceSequence getSequence( String contig ) { - return getSubsequenceAt( contig, 0, (int)index.getIndexEntry(contig).getSize() ); + return getSubsequenceAt( contig, 0, (int)index.getIndexEntry(contig).getSize()-1 ); } - public ReferenceSequence getSubsequenceAt( String contig, int pos, int length ) { + public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) { + if(start > stop) + throw new PicardException(String.format("Malformed query; start point %d lies after end point %d",start,stop)); + if(start > Integer.MAX_VALUE) + throw new PicardException("Due to current ReferenceSequence limitations, a start point larger than Integer.MAX_VALUE cannot be loaded."); + if(stop - start + 1 > Integer.MAX_VALUE) + throw new PicardException("Due to current ReferenceSequence limitations, a region larger than Integer.MAX_VALUE cannot be loaded."); + FastaSequenceIndexEntry indexEntry = index.getIndexEntry(contig); - if(pos + length - 1 > indexEntry.getSize()) + if(stop > indexEntry.getSize()) throw new PicardException("Query asks for data past end of contig"); + int length = (int)(stop - start + 1); + final int basesPerLine = indexEntry.getBasesPerLine(); final int bytesPerLine = indexEntry.getBytesPerLine(); // Start reading at the closest start-of-line to our data. - long readStart = indexEntry.getLocation() + (pos / basesPerLine) * bytesPerLine; - int dataOfInterestStart = pos % basesPerLine; + long readStart = indexEntry.getLocation() + (start / basesPerLine) * bytesPerLine; + int dataOfInterestStart = (int)(start % basesPerLine); byte[] accumulator = new byte[length]; int nextAccumulatorSlot = 0; @@ -158,7 +167,7 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile { dataOfInterestStart = 0; } - return new ReferenceSequence( contig, pos, accumulator ); + return new ReferenceSequence( contig, (int)start, accumulator ); } /** diff --git a/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java b/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java index 8a0719402..300d8eb99 100755 --- a/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java +++ b/java/test/org/broadinstitute/sting/utils/fasta/IndexedFastaSequenceFileTest.java @@ -53,7 +53,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testFirstSequence() { long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,firstBasesOfChrM.length()); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,firstBasesOfChrM.length()-1); long endTime = System.currentTimeMillis(); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); @@ -67,7 +67,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testFirstSequenceExtended() { long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,extendedBasesOfChrM.length()); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,extendedBasesOfChrM.length()-1); long endTime = System.currentTimeMillis(); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); @@ -84,7 +84,9 @@ public class IndexedFastaSequenceFileTest extends BaseTest { String truncated = extendedBasesOfChrM.substring(bytesToChopOff); long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM", bytesToChopOff ,truncated.length() ); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM", + bytesToChopOff, + bytesToChopOff + truncated.length() - 1); long endTime = System.currentTimeMillis(); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); @@ -101,7 +103,9 @@ public class IndexedFastaSequenceFileTest extends BaseTest { String truncated = extendedBasesOfChrM.substring(bytesToChopOff); long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM", bytesToChopOff, truncated.length() ); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM", + bytesToChopOff, + bytesToChopOff + truncated.length() - 1); long endTime = System.currentTimeMillis(); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); @@ -133,7 +137,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { public void testReadThroughEndOfContig() { long startTime = System.currentTimeMillis(); try { - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16500,100); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16500,16600); } finally { long endTime = System.currentTimeMillis(); @@ -145,7 +149,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { public void testReadPastEndOfContig() { long startTime = System.currentTimeMillis(); try { - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16800,100); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16800,16900); } finally { long endTime = System.currentTimeMillis(); @@ -193,7 +197,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testFirstOfChr1() { long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",0,firstBasesOfChr1.length()); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",0,firstBasesOfChr1.length()-1); long endTime = System.currentTimeMillis(); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr1"); @@ -207,7 +211,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest { @Test public void testFirstOfChr8() { long startTime = System.currentTimeMillis(); - ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",0,firstBasesOfChr8.length()); + ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",0,firstBasesOfChr8.length()-1); long endTime = System.currentTimeMillis(); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8");