Changed interface from contig,pos,length to more common contig,start,stop interface.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@441 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-04-16 00:04:41 +00:00
parent 6c9d110eb9
commit 56f6847456
2 changed files with 27 additions and 14 deletions

View File

@ -118,21 +118,30 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
} }
public ReferenceSequence getSequence( String contig ) { public ReferenceSequence getSequence( String contig ) {
return getSubsequenceAt( contig, 0, (int)index.getIndexEntry(contig).getSize() ); return getSubsequenceAt( contig, 0, (int)index.getIndexEntry(contig).getSize()-1 );
} }
public ReferenceSequence getSubsequenceAt( String contig, int pos, int length ) { public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
if(start > stop)
throw new PicardException(String.format("Malformed query; start point %d lies after end point %d",start,stop));
if(start > Integer.MAX_VALUE)
throw new PicardException("Due to current ReferenceSequence limitations, a start point larger than Integer.MAX_VALUE cannot be loaded.");
if(stop - start + 1 > Integer.MAX_VALUE)
throw new PicardException("Due to current ReferenceSequence limitations, a region larger than Integer.MAX_VALUE cannot be loaded.");
FastaSequenceIndexEntry indexEntry = index.getIndexEntry(contig); FastaSequenceIndexEntry indexEntry = index.getIndexEntry(contig);
if(pos + length - 1 > indexEntry.getSize()) if(stop > indexEntry.getSize())
throw new PicardException("Query asks for data past end of contig"); throw new PicardException("Query asks for data past end of contig");
int length = (int)(stop - start + 1);
final int basesPerLine = indexEntry.getBasesPerLine(); final int basesPerLine = indexEntry.getBasesPerLine();
final int bytesPerLine = indexEntry.getBytesPerLine(); final int bytesPerLine = indexEntry.getBytesPerLine();
// Start reading at the closest start-of-line to our data. // Start reading at the closest start-of-line to our data.
long readStart = indexEntry.getLocation() + (pos / basesPerLine) * bytesPerLine; long readStart = indexEntry.getLocation() + (start / basesPerLine) * bytesPerLine;
int dataOfInterestStart = pos % basesPerLine; int dataOfInterestStart = (int)(start % basesPerLine);
byte[] accumulator = new byte[length]; byte[] accumulator = new byte[length];
int nextAccumulatorSlot = 0; int nextAccumulatorSlot = 0;
@ -158,7 +167,7 @@ public class IndexedFastaSequenceFile implements ReferenceSequenceFile {
dataOfInterestStart = 0; dataOfInterestStart = 0;
} }
return new ReferenceSequence( contig, pos, accumulator ); return new ReferenceSequence( contig, (int)start, accumulator );
} }
/** /**

View File

@ -53,7 +53,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
@Test @Test
public void testFirstSequence() { public void testFirstSequence() {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,firstBasesOfChrM.length()); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,firstBasesOfChrM.length()-1);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM");
@ -67,7 +67,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
@Test @Test
public void testFirstSequenceExtended() { public void testFirstSequenceExtended() {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,extendedBasesOfChrM.length()); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",0,extendedBasesOfChrM.length()-1);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM");
@ -84,7 +84,9 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
String truncated = extendedBasesOfChrM.substring(bytesToChopOff); String truncated = extendedBasesOfChrM.substring(bytesToChopOff);
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM", bytesToChopOff ,truncated.length() ); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
bytesToChopOff,
bytesToChopOff + truncated.length() - 1);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM");
@ -101,7 +103,9 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
String truncated = extendedBasesOfChrM.substring(bytesToChopOff); String truncated = extendedBasesOfChrM.substring(bytesToChopOff);
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM", bytesToChopOff, truncated.length() ); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",
bytesToChopOff,
bytesToChopOff + truncated.length() - 1);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM"); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM");
@ -133,7 +137,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
public void testReadThroughEndOfContig() { public void testReadThroughEndOfContig() {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
try { try {
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16500,100); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16500,16600);
} }
finally { finally {
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
@ -145,7 +149,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
public void testReadPastEndOfContig() { public void testReadPastEndOfContig() {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
try { try {
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16800,100); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",16800,16900);
} }
finally { finally {
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
@ -193,7 +197,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
@Test @Test
public void testFirstOfChr1() { public void testFirstOfChr1() {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",0,firstBasesOfChr1.length()); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr1",0,firstBasesOfChr1.length()-1);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr1"); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr1");
@ -207,7 +211,7 @@ public class IndexedFastaSequenceFileTest extends BaseTest {
@Test @Test
public void testFirstOfChr8() { public void testFirstOfChr8() {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",0,firstBasesOfChr8.length()); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chr8",0,firstBasesOfChr8.length()-1);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8"); Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8");