2009-04-12 05:10:42 +08:00
|
|
|
package org.broadinstitute.sting.utils;
|
|
|
|
|
|
2009-04-15 01:11:38 +08:00
|
|
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
|
|
|
|
import net.sf.samtools.util.StringUtil;
|
|
|
|
|
import org.broadinstitute.sting.BaseTest;
|
2009-04-13 02:05:56 +08:00
|
|
|
import org.junit.*;
|
2009-04-12 05:10:42 +08:00
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: mhanna
|
|
|
|
|
* Date: Apr 11, 2009
|
|
|
|
|
* Time: 2:32:52 PM
|
|
|
|
|
*/
|
2009-04-15 01:11:38 +08:00
|
|
|
public class FastaSequenceFile2Test extends BaseTest {
|
2009-04-12 05:10:42 +08:00
|
|
|
|
2009-04-15 01:11:38 +08:00
|
|
|
private static String sequenceFileName;
|
2009-04-12 05:10:42 +08:00
|
|
|
private FastaSequenceFile2 sequenceFile = null;
|
|
|
|
|
|
|
|
|
|
private final String firstBasesOfChrM = "GATCACAGGTCTATCACCCT";
|
|
|
|
|
private final String firstBasesOfChr1 = "taaccctaaccctaacccta";
|
|
|
|
|
private final String firstBasesOfChr8 = "GCAATTATGACACAAAAAAT";
|
|
|
|
|
|
2009-04-13 02:05:56 +08:00
|
|
|
@BeforeClass
|
|
|
|
|
public static void initialize() {
|
2009-04-15 01:11:38 +08:00
|
|
|
sequenceFileName = seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
2009-04-13 02:05:56 +08:00
|
|
|
}
|
|
|
|
|
|
2009-04-12 05:10:42 +08:00
|
|
|
@Before
|
|
|
|
|
public void doForEachTest() {
|
|
|
|
|
sequenceFile = new FastaSequenceFile2( new File(sequenceFileName) );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Tears down the test fixture after each call.
|
|
|
|
|
* <p/>
|
|
|
|
|
* Called after every test case method.
|
|
|
|
|
*/
|
|
|
|
|
@After
|
|
|
|
|
public void undoForEachTest() {
|
|
|
|
|
sequenceFile = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testOpenFile() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = System.currentTimeMillis();
|
2009-04-12 05:10:42 +08:00
|
|
|
Assert.assertNotNull( sequenceFile );
|
2009-04-13 02:05:56 +08:00
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testOpenFile runtime: %dms%n", (endTime - startTime)) ;
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testFirstSequence() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = System.currentTimeMillis();
|
2009-04-12 05:10:42 +08:00
|
|
|
ReferenceSequence sequence = sequenceFile.nextSequence();
|
|
|
|
|
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chrM");
|
|
|
|
|
Assert.assertEquals( "First n bases of chrM are incorrect",
|
|
|
|
|
StringUtil.bytesToString( sequence.getBases(), 0, firstBasesOfChrM.length() ),
|
|
|
|
|
firstBasesOfChrM );
|
2009-04-13 02:05:56 +08:00
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testFirstSequence runtime: %dms%n", (endTime - startTime)) ;
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testNextSequence() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
2009-04-12 05:10:42 +08:00
|
|
|
ReferenceSequence sequence = null;
|
|
|
|
|
|
|
|
|
|
// Advance to chrM.
|
|
|
|
|
sequence = sequenceFile.nextSequence();
|
|
|
|
|
sequence = sequenceFile.nextSequence();
|
|
|
|
|
|
|
|
|
|
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr1");
|
|
|
|
|
|
|
|
|
|
// Workaround: bytesToString for chr1 of the fasta file we've picked doesn't appear to work.
|
|
|
|
|
// TODO: Report this as sam-jdk bug.
|
|
|
|
|
byte[] firstOfChr1 = StringUtil.stringToBytes(firstBasesOfChr1);
|
|
|
|
|
byte[] firstOfSequence = new byte[firstBasesOfChr1.length()];
|
|
|
|
|
System.arraycopy(sequence.getBases(), 0, firstOfSequence, 0, firstOfSequence.length );
|
|
|
|
|
|
|
|
|
|
Assert.assertArrayEquals("First bases of chr1 are not correct", firstOfChr1, firstOfSequence );
|
2009-04-13 02:05:56 +08:00
|
|
|
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testNextSequence runtime: %dms%n", (endTime - startTime)) ;
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testSeekToSequence() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
2009-04-12 05:10:42 +08:00
|
|
|
boolean success = sequenceFile.seekToContig("chr8");
|
|
|
|
|
Assert.assertTrue("Seek to seq chr8 failed", success );
|
|
|
|
|
|
|
|
|
|
ReferenceSequence sequence = sequenceFile.nextSequence();
|
|
|
|
|
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8");
|
|
|
|
|
Assert.assertEquals( "First n bases of chrc are incorrect",
|
|
|
|
|
StringUtil.bytesToString( sequence.getBases(), 0, firstBasesOfChr8.length() ),
|
|
|
|
|
firstBasesOfChr8 );
|
2009-04-13 02:05:56 +08:00
|
|
|
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testSeekToSequence runtime: %dms%n", (endTime - startTime)) ;
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: Is NullPointerException *really* the right exception when a sequence is missing?
|
|
|
|
|
@Test(expected=NullPointerException.class)
|
|
|
|
|
public void testSeekToMissingSequence() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = 0L, endTime = 0L;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
startTime = System.currentTimeMillis();
|
2009-04-15 01:11:38 +08:00
|
|
|
boolean success = sequenceFile.seekToContig("absent");
|
2009-04-13 02:05:56 +08:00
|
|
|
}
|
|
|
|
|
finally {
|
|
|
|
|
endTime = System.currentTimeMillis();
|
|
|
|
|
System.err.printf("testSeekToMissingSequence runtime: %dms%n", (endTime - startTime)) ;
|
|
|
|
|
}
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testSeekBackward() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
2009-04-12 05:10:42 +08:00
|
|
|
boolean success = sequenceFile.seekToContig("chr9");
|
|
|
|
|
Assert.assertTrue("Unable to seek to contig 'chr9'", success);
|
|
|
|
|
|
|
|
|
|
success = sequenceFile.seekToContig("chr8",true);
|
|
|
|
|
Assert.assertTrue("Unable to seek backward to contig 'chr8'", success);
|
|
|
|
|
|
|
|
|
|
ReferenceSequence sequence = sequenceFile.nextSequence();
|
|
|
|
|
Assert.assertEquals("First sequence contig is not correct", sequence.getName(), "chr8");
|
|
|
|
|
Assert.assertEquals( "First n bases of chrc are incorrect",
|
|
|
|
|
StringUtil.bytesToString( sequence.getBases(), 0, firstBasesOfChr8.length() ),
|
2009-04-13 02:05:56 +08:00
|
|
|
firstBasesOfChr8 );
|
|
|
|
|
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testSeekBackward runtime: %dms%n", (endTime - startTime)) ;
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testInvalidSeekBackward() {
|
2009-04-13 02:05:56 +08:00
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
2009-04-12 05:10:42 +08:00
|
|
|
boolean success = sequenceFile.seekToContig("chr9");
|
|
|
|
|
Assert.assertTrue("Unable to seek to contig 'chr9'", success);
|
|
|
|
|
|
|
|
|
|
success = sequenceFile.seekToContig("chr8");
|
|
|
|
|
Assert.assertFalse("Unable to seek backward to contig 'chr8'", success);
|
2009-04-13 02:05:56 +08:00
|
|
|
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testInvalidSeekBackward runtime: %dms%n", (endTime - startTime)) ;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testSimultaneousAccess() {
|
|
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
|
|
2009-04-13 23:16:42 +08:00
|
|
|
// FastaSequenceFile2 other = (FastaSequenceFile2)sequenceFile.clone();
|
2009-04-13 02:05:56 +08:00
|
|
|
|
|
|
|
|
sequenceFile.seekToContig("chr1");
|
|
|
|
|
ReferenceSequence chr1 = sequenceFile.nextSequence();
|
|
|
|
|
|
|
|
|
|
// other.seekToContig("chr8");
|
|
|
|
|
// ReferenceSequence chr8 = other.nextSequence();
|
|
|
|
|
|
|
|
|
|
// System.err.printf( "sequenceFile contig: %s%n", sequenceFile.getContigName() );
|
|
|
|
|
// System.err.printf( "other contig: %s%n", other.getContigName() );
|
|
|
|
|
|
|
|
|
|
long endTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
|
|
System.err.printf("testSimultaneousAccess runtime: %dms%n", (endTime - startTime)) ;
|
2009-04-12 05:10:42 +08:00
|
|
|
}
|
|
|
|
|
}
|