diff --git a/core/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java b/core/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java index 9ca8dadeb..5ac519ce0 100755 --- a/core/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java +++ b/core/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java @@ -237,12 +237,6 @@ public class TraversalEngine { * @param loc Current location */ public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) { - // If an index is enabled, file read progress is meaningless because a linear - // traversal is not being performed. For now, don't bother printing progress. - // TODO: Create a sam indexed read tracker that tracks based on percentage through the query. - if( samReadingTracker == null ) - return; - final long nRecords = this.nRecords; final long curTime = System.currentTimeMillis(); final double elapsed = (curTime - startTime) / 1000.0; @@ -257,7 +251,11 @@ public class TraversalEngine { System.out.printf("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)%n", nRecords, type, elapsed, secsPer1MReads, type); // Currently samReadingTracker will print misleading info if we're not processing the whole file - if ( this.locs == null ) + + // If an index is enabled, file read progress is meaningless because a linear + // traversal is not being performed. For now, don't bother printing progress. + // TODO: Create a sam indexed read tracker that tracks based on percentage through the query. + if ( samReadingTracker != null && this.locs == null ) System.out.printf("[PROGRESS] -> %s%n", samReadingTracker.progressMeter()); } } diff --git a/core/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java b/core/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java index 7f7909543..e116117bb 100755 --- a/core/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java +++ b/core/java/src/org/broadinstitute/sting/gatk/iterators/ReferenceIterator.java @@ -112,7 +112,13 @@ public class ReferenceIterator implements Iterator { next(); //System.out.printf(" -> Seeking to %s %d from %s %d%n", contigName, seekOffset, currentContig.getName(), offset); - if ( contigName.equals(currentContig.getName()) ) { + int cmpContigs = GenomeLoc.compareContigs(contigName, currentContig.getName()); + if ( cmpContigs == -1 ) { + // The contig we are looking for is before the currentContig -- it's an error + throw new IllegalArgumentException(String.format("Invalid seek to %s from %s, which is usually due to out of order reads%n", + new GenomeLoc(currentContig.getName(), seekOffset), new GenomeLoc(currentContig.getName(), offset))); + } + else if ( cmpContigs == 0 ) { // we're somewhere on this contig if ( seekOffset < offset || seekOffset >= currentContig.length() ) { // bad boy -- can't go backward safely or just beyond the contig length @@ -134,7 +140,7 @@ public class ReferenceIterator implements Iterator { // never found anything return null; } - else if ( nextContig.getName().equals(contigName) ) { + else if ( GenomeLoc.compareContigs( nextContig.getName(), contigName ) == 0 ) { swapNextContig(); return seekForward(contigName, seekOffset); } diff --git a/core/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java b/core/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java index 2866cd041..10c74a340 100755 --- a/core/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java +++ b/core/java/src/org/broadinstitute/sting/gatk/iterators/ThreadedIterator.java @@ -16,6 +16,7 @@ public class ThreadedIterator implements Iterator, Runnable { private Iterator it; private final BlockingQueue queue; private int nOps = 0; + private final int printStateFreq = -1; public void run() { try { @@ -30,7 +31,7 @@ public class ThreadedIterator implements Iterator, Runnable { } public synchronized void printState(final String op) { - if ( nOps++ % 100000 == 0 ) + if ( printStateFreq != -1 && nOps++ % printStateFreq == 0 ) System.out.printf(" [%s] Queue has %d elements %d ops%n", op, queue.size(), nOps); } diff --git a/playground/python/StressTestGATK.py b/playground/python/StressTestGATK.py index de7b2089b..3eb7aabae 100755 --- a/playground/python/StressTestGATK.py +++ b/playground/python/StressTestGATK.py @@ -3,36 +3,42 @@ import os.path import sys import getopt +defaultCommands = ['CountLoci', 'Pileup'] + def usage(): print "Optional arguments:" print " -f QUEUE Farm jobs to QUEUE on LSF" - + print " -c cmd1,cmd2 Walkers to execute, otherwise", ' '.join(defaultCommands) + if __name__ == "__main__": opts = None try: - opts, args = getopt.getopt(sys.argv[1:], "f:", ["farm"]) + opts, args = getopt.getopt(sys.argv[1:], "f:c:", ["farm", "commands"]) except getopt.GetoptError: print sys.argv usage() sys.exit(2) farm_sub = False + commandsList = defaultCommands for opt, arg in opts: if opt in ("-f", "--farm"): farm_sub = arg + if opt in ("-c", "--commands"): + commandsList = arg.split(',') - for line in open(sys.argv[1]): + for line in open(args[0]): lane = line.strip() head, lane_filename = os.path.split(lane) filebase = os.path.splitext(lane_filename)[0] # convert the fasta - for analysis in ['CountLoci', 'Pileup']: + for analysis in commandsList: output = filebase + '.' + analysis + '.output' - if not os.path.exists(output): - cmd = "java -jar ~/dev/GenomeAnalysisTK/trunk/playground/java/dist/GenomeAnalysisTK.jar T=" + analysis + " I= " + lane + " R= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta" - print cmd - farm_commands.cmd(cmd, farm_sub, output, just_print_commands=True) + #if not os.path.exists(output): + cmd = "java -jar ~/dev/GenomeAnalysisTK/trunk/playground/java/dist/GenomeAnalysisTK.jar T=" + analysis + " I= " + lane + " R= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta " + print cmd + farm_commands.cmd(cmd, farm_sub, output)