Added a definitely inappropriately placed testing of the new fasta seeking system at the bottom of the file -- it's not called but it probably should be moved to somewhere more appropriate.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@141 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
611ab0bdb3
commit
1df23b0417
|
|
@ -2,20 +2,28 @@ package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
import edu.mit.broad.picard.cmdline.CommandLineProgram;
|
import edu.mit.broad.picard.cmdline.CommandLineProgram;
|
||||||
import edu.mit.broad.picard.cmdline.Usage;
|
import edu.mit.broad.picard.cmdline.Usage;
|
||||||
import edu.mit.broad.picard.cmdline.Option;
|
import edu.mit.broad.picard.cmdline.Option;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
public class GenomeAnalysisTK extends CommandLineProgram {
|
public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
// Usage and parameters
|
// Usage and parameters
|
||||||
|
|
@ -51,6 +59,8 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
protected int doWork() {
|
protected int doWork() {
|
||||||
walkerManager = new WalkerManager(pluginPathName);
|
walkerManager = new WalkerManager(pluginPathName);
|
||||||
|
|
||||||
|
//testNewReferenceFeatures(REF_FILE_ARG);
|
||||||
|
|
||||||
final boolean TEST_ROD = false;
|
final boolean TEST_ROD = false;
|
||||||
ReferenceOrderedData[] rods = null;
|
ReferenceOrderedData[] rods = null;
|
||||||
|
|
||||||
|
|
@ -117,8 +127,9 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
engine.setSortOnFly(ENABLED_SORT_ON_FLY.toLowerCase().equals("true"));
|
engine.setSortOnFly(ENABLED_SORT_ON_FLY.toLowerCase().equals("true"));
|
||||||
|
|
||||||
engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true"));
|
engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true"));
|
||||||
|
|
||||||
//engine.testReference();
|
//engine.testReference();
|
||||||
|
|
||||||
//LocusWalker<Integer,Integer> walker = new PileupWalker();
|
//LocusWalker<Integer,Integer> walker = new PileupWalker();
|
||||||
|
|
||||||
// Try to get the walker specified
|
// Try to get the walker specified
|
||||||
|
|
@ -142,4 +153,116 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An inappropriately placed validation and performance testing routine for jumping
|
||||||
|
* around in the fasta sequence file.
|
||||||
|
* @param refFileName
|
||||||
|
*/
|
||||||
|
private static void testNewReferenceFeatures(final File refFileName) {
|
||||||
|
final FastaSequenceFile2 refFile = new FastaSequenceFile2(refFileName);
|
||||||
|
Utils.setupRefContigOrdering(refFile);
|
||||||
|
|
||||||
|
List<SAMSequenceRecord> refContigs = refFile.getSequenceDictionary().getSequences();
|
||||||
|
|
||||||
|
/*
|
||||||
|
for ( SAMSequenceRecord refContig: refContigs ) {
|
||||||
|
System.out.printf(" Traversing from chr1 to %s would require jumping %d bytes%n",
|
||||||
|
refContig.getSequenceName(), refFile.getDistanceBetweenContigs("chr1", refContig.getSequenceName()));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
String lastContig = null;
|
||||||
|
List<Double> timings = new ArrayList<Double>();
|
||||||
|
for ( SAMSequenceRecord startContig : refFile.getSequenceDictionary().getSequences() ) {
|
||||||
|
final String startContigName = startContig.getSequenceName();
|
||||||
|
for ( SAMSequenceRecord targetContig : refFile.getSequenceDictionary().getSequences() ) {
|
||||||
|
refFile.seekToContig(startContigName, true);
|
||||||
|
System.out.printf("Seeking: current=%s, target=%s%n", startContigName, targetContig.getSequenceName());
|
||||||
|
long lastTime = System.currentTimeMillis();
|
||||||
|
final boolean success = refFile.seekToContig(targetContig.getSequenceName(), true);
|
||||||
|
long curTime = System.currentTimeMillis();
|
||||||
|
final double elapsed = (curTime - lastTime) / 1000.0;
|
||||||
|
timings.add(elapsed);
|
||||||
|
System.out.printf(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n",
|
||||||
|
elapsed, Utils.averageDouble(timings), timings.size());
|
||||||
|
|
||||||
|
if ( ! success ) {
|
||||||
|
System.out.printf("Failured to seek to %s from %s%n", targetContig.getSequenceName(), lastContig );
|
||||||
|
}
|
||||||
|
//System.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
System.exit(1);
|
||||||
|
|
||||||
|
// code for randomly sampling the seeks
|
||||||
|
// Random rnd = new Random();
|
||||||
|
// String lastContig = null;
|
||||||
|
// List<Double> timings = new ArrayList<Double>();
|
||||||
|
// final int N_SAMPLES = 1000;
|
||||||
|
// //try { refFile.seekToContig("chr3"); } catch ( IOException e ) {}
|
||||||
|
// for ( int i = 0; i < N_SAMPLES; i++ ) {
|
||||||
|
// final int nextIndex = rnd.nextInt(refContigs.size());
|
||||||
|
// String nextContig = refFile.getSequenceDictionary().getSequence(nextIndex).getSequenceName();
|
||||||
|
// //nextContig = "chr2";
|
||||||
|
// try {
|
||||||
|
// System.out.printf("Seeking: current=%s, target=%s%n", refFile.getContigName(), nextContig);
|
||||||
|
// long lastTime = System.currentTimeMillis();
|
||||||
|
// final boolean success = refFile.seekToContig(nextContig, true);
|
||||||
|
// long curTime = System.currentTimeMillis();
|
||||||
|
// final double elapsed = (curTime - lastTime) / 1000.0;
|
||||||
|
// timings.add(elapsed);
|
||||||
|
// System.out.printf(" -> Elapsed time %.2f, averaging %.2f sec / seek for %d seeks%n",
|
||||||
|
// elapsed, Utils.averageDouble(timings), timings.size());
|
||||||
|
//
|
||||||
|
// if ( ! success ) {
|
||||||
|
// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
|
||||||
|
// }
|
||||||
|
// //System.exit(1);
|
||||||
|
// } catch ( IOException e ) {
|
||||||
|
// System.out.printf("Failured to seek to %s from %s%n", nextContig, lastContig );
|
||||||
|
// e.printStackTrace();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// lastContig = nextContig;
|
||||||
|
// }
|
||||||
|
// System.exit(1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
final String targetChr = "chr10";
|
||||||
|
try {
|
||||||
|
refFile.seekToContig(targetChr);
|
||||||
|
} catch ( IOException e ){
|
||||||
|
System.out.printf("Failured to seek to %s%n", targetChr);
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
System.exit(1);
|
||||||
|
*/
|
||||||
|
|
||||||
|
//List<Double> timings = new ArrayList<Double>();
|
||||||
|
final long startTime = System.currentTimeMillis();
|
||||||
|
long lastTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
String prevNextContigName = null;
|
||||||
|
System.out.printf("Walking reference sequence:%n");
|
||||||
|
for ( SAMSequenceRecord refContig: refContigs ) {
|
||||||
|
long curTime = System.currentTimeMillis();
|
||||||
|
ReferenceSequence contig = refFile.nextSequence();
|
||||||
|
final double elapsed = (curTime - lastTime) / 1000.0;
|
||||||
|
timings.add(elapsed);
|
||||||
|
System.out.printf("%2d : expected %s contig, found %s with next of %s after %.2f seconds, average is %.2f%n", i,
|
||||||
|
refContig.getSequenceName(), contig.getName(), refFile.getNextContigName(), elapsed, Utils.averageDouble(timings));
|
||||||
|
if ( prevNextContigName != null && contig.getName() != null && ! prevNextContigName.equals(contig.getName()) )
|
||||||
|
throw new RuntimeIOException(String.format("Unexpected contig ordering %s was expected next, but I found %s?",
|
||||||
|
prevNextContigName, contig.getName()));
|
||||||
|
|
||||||
|
prevNextContigName = refFile.getNextContigName();
|
||||||
|
lastTime = curTime;
|
||||||
|
i++;
|
||||||
|
|
||||||
|
System.out.printf(" Traversing from chr1 to %s would require jumping %d bytes%n",
|
||||||
|
contig.getName(), refFile.getDistanceBetweenContigs("chr1", contig.getName()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue