package org.broadinstitute.sting.gatk;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import static org.junit.Assert.fail;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
*
* User: aaron
* Date: Apr 28, 2009
* Time: 5:21:29 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 28, 2009
*
* Class GenomeAnalysisTKTest
*
* A quick move of the unit test cases that were stuffed into GenomeAnalysisTK.
*/
public class GenomeAnalysisTKTest extends BaseTest {
// our sequence dictionary to check
private final File refFileName = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta");
// skip n number of chromesomes when we do a chromesome by chromesome jumping
private final int skipChrome = 15;
/**
* This has been blindly moved out of the GenomeAnalysisTK.java where it was hanging on,
* but the author makes very limited promises of any functionality
*
*/
@Test
public void testNewReferenceFeatures() {
final FastaSequenceFile2 refFile = new FastaSequenceFile2(refFileName);
GenomeLoc.setupRefContigOrdering(refFile);
List refContigs = refFile.getSequenceDictionary().getSequences();
/*
for ( SAMSequenceRecord refContig: refContigs ) {
System.out.printf(" Traversing from chr1 to %s would require jumping %d bytes%n",
refContig.getSequenceName(), refFile.getDistanceBetweenContigs("chr1", refContig.getSequenceName()));
}
*/
String lastContig = null;
List timings = new ArrayList();
for ( SAMSequenceRecord startContig : refFile.getSequenceDictionary().getSequences() ) {
final String startContigName = startContig.getSequenceName();
int skip = 1;
for ( SAMSequenceRecord targetContig : refFile.getSequenceDictionary().getSequences() ) {
if (skipChrome != skip) {
++skip;
continue;
} else {
skip = 1;
}
refFile.seekToContig(startContigName, true);
logger.warn(String.format("Seeking: current=%s, target=%s", startContigName, targetContig.getSequenceName()));
long lastTime = System.currentTimeMillis();
final boolean success = refFile.seekToContig(targetContig.getSequenceName(), true);
long curTime = System.currentTimeMillis();
final double elapsed = (curTime - lastTime) / 1000.0;
timings.add(elapsed);
logger.warn(String.format(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks",
elapsed, Utils.averageDouble(timings), timings.size()));
if ( ! success ) {
fail(String.format("Failured to seek to %s from %s", targetContig.getSequenceName(), lastContig ));
}
//System.exit(1);
}
}
// code for randomly sampling the seeks
// Random rnd = new Random();
// String lastContig = null;
// List timings = new ArrayList();
// final int N_SAMPLES = 1000;
// //try { refFile.seekToContig("chr3"); } catch ( IOException e ) {}
// for ( int i = 0; i < N_SAMPLES; i++ ) {
// final int nextIndex = rnd.nextInt(refContigs.size());
// String nextContig = refFile.getSequenceDictionary().getSequence(nextIndex).getSequenceName();
// //nextContig = "chr2";
// try {
// System.out.printf("Seeking: current=%s, target=%s%n", refFile.getContigName(), nextContig);
// long lastTime = System.currentTimeMillis();
// final boolean success = refFile.seekToContig(nextContig, true);
// long curTime = System.currentTimeMillis();
// final double elapsed = (curTime - lastTime) / 1000.0;
// timings.add(elapsed);
// System.out.printf(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n",
// elapsed, Utils.averageDouble(timings), timings.size());
//
// if ( ! success ) {
// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
// }
// //System.exit(1);
// } catch ( IOException e ) {
// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
// e.printStackTrace();
// }
//
// lastContig = nextContig;
// }
// System.exit(1);
/*
final String targetChr = "chr10";
try {
refFile.seekToContig(targetChr);
} catch ( IOException e ){
System.out.printf("Failured to seek to %s%n", targetChr);
e.printStackTrace();
}
System.exit(1);
//List timings = new ArrayList();
final long startTime = System.currentTimeMillis();
long lastTime = System.currentTimeMillis();
int i = 0;
String prevNextContigName = null;
logger.info(String.format("Walking reference sequence:%n"));
for ( SAMSequenceRecord refContig: refContigs ) {
long curTime = System.currentTimeMillis();
ReferenceSequence contig = refFile.nextSequence();
final double elapsed = (curTime - lastTime) / 1000.0;
timings.add(elapsed);
logger.info(String.format("%2d : expected %s contig, found %s with next of %s after %.2f seconds, average is %.2f", i,
refContig.getSequenceName(), contig.getName(), refFile.getNextContigName(), elapsed, Utils.averageDouble(timings)));
if ( prevNextContigName != null && contig.getName() != null && ! prevNextContigName.equals(contig.getName()) )
throw new RuntimeIOException(String.format("Unexpected contig ordering %s was expected next, but I found %s?",
prevNextContigName, contig.getName()));
prevNextContigName = refFile.getNextContigName();
lastTime = curTime;
i++;
logger.info(String.format(" Traversing from chr1 to %s would require jumping %d bytes",
contig.getName(), refFile.getDistanceBetweenContigs("chr1", contig.getName())));
}
*/
}
}