Fixing output printing issues in the code, as well as adding more safety checks

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@105 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-03-19 23:02:49 +00:00
parent 6fdd622160
commit 34ee48fd82
4 changed files with 29 additions and 18 deletions

View File

@ -237,12 +237,6 @@ public class TraversalEngine {
* @param loc Current location
*/
public void printProgress( boolean mustPrint, final String type, GenomeLoc loc ) {
// If an index is enabled, file read progress is meaningless because a linear
// traversal is not being performed. For now, don't bother printing progress.
// TODO: Create a sam indexed read tracker that tracks based on percentage through the query.
if( samReadingTracker == null )
return;
final long nRecords = this.nRecords;
final long curTime = System.currentTimeMillis();
final double elapsed = (curTime - startTime) / 1000.0;
@ -257,7 +251,11 @@ public class TraversalEngine {
System.out.printf("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)%n", nRecords, type, elapsed, secsPer1MReads, type);
// Currently samReadingTracker will print misleading info if we're not processing the whole file
if ( this.locs == null )
// If an index is enabled, file read progress is meaningless because a linear
// traversal is not being performed. For now, don't bother printing progress.
// TODO: Create a sam indexed read tracker that tracks based on percentage through the query.
if ( samReadingTracker != null && this.locs == null )
System.out.printf("[PROGRESS] -> %s%n", samReadingTracker.progressMeter());
}
}

View File

@ -112,7 +112,13 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
next();
//System.out.printf(" -> Seeking to %s %d from %s %d%n", contigName, seekOffset, currentContig.getName(), offset);
if ( contigName.equals(currentContig.getName()) ) {
int cmpContigs = GenomeLoc.compareContigs(contigName, currentContig.getName());
if ( cmpContigs == -1 ) {
// The contig we are looking for is before the currentContig -- it's an error
throw new IllegalArgumentException(String.format("Invalid seek to %s from %s, which is usually due to out of order reads%n",
new GenomeLoc(currentContig.getName(), seekOffset), new GenomeLoc(currentContig.getName(), offset)));
}
else if ( cmpContigs == 0 ) {
// we're somewhere on this contig
if ( seekOffset < offset || seekOffset >= currentContig.length() ) {
// bad boy -- can't go backward safely or just beyond the contig length
@ -134,7 +140,7 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
// never found anything
return null;
}
else if ( nextContig.getName().equals(contigName) ) {
else if ( GenomeLoc.compareContigs( nextContig.getName(), contigName ) == 0 ) {
swapNextContig();
return seekForward(contigName, seekOffset);
}

View File

@ -16,6 +16,7 @@ public class ThreadedIterator<T> implements Iterator<T>, Runnable {
private Iterator<T> it;
private final BlockingQueue<T> queue;
private int nOps = 0;
private final int printStateFreq = -1;
public void run() {
try {
@ -30,7 +31,7 @@ public class ThreadedIterator<T> implements Iterator<T>, Runnable {
}
public synchronized void printState(final String op) {
if ( nOps++ % 100000 == 0 )
if ( printStateFreq != -1 && nOps++ % printStateFreq == 0 )
System.out.printf(" [%s] Queue has %d elements %d ops%n", op, queue.size(), nOps);
}

View File

@ -3,36 +3,42 @@ import os.path
import sys
import getopt
defaultCommands = ['CountLoci', 'Pileup']
def usage():
print "Optional arguments:"
print " -f QUEUE Farm jobs to QUEUE on LSF"
print " -c cmd1,cmd2 Walkers to execute, otherwise", ' '.join(defaultCommands)
if __name__ == "__main__":
opts = None
try:
opts, args = getopt.getopt(sys.argv[1:], "f:", ["farm"])
opts, args = getopt.getopt(sys.argv[1:], "f:c:", ["farm", "commands"])
except getopt.GetoptError:
print sys.argv
usage()
sys.exit(2)
farm_sub = False
commandsList = defaultCommands
for opt, arg in opts:
if opt in ("-f", "--farm"):
farm_sub = arg
if opt in ("-c", "--commands"):
commandsList = arg.split(',')
for line in open(sys.argv[1]):
for line in open(args[0]):
lane = line.strip()
head, lane_filename = os.path.split(lane)
filebase = os.path.splitext(lane_filename)[0]
# convert the fasta
for analysis in ['CountLoci', 'Pileup']:
for analysis in commandsList:
output = filebase + '.' + analysis + '.output'
if not os.path.exists(output):
cmd = "java -jar ~/dev/GenomeAnalysisTK/trunk/playground/java/dist/GenomeAnalysisTK.jar T=" + analysis + " I= " + lane + " R= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"
print cmd
farm_commands.cmd(cmd, farm_sub, output, just_print_commands=True)
#if not os.path.exists(output):
cmd = "java -jar ~/dev/GenomeAnalysisTK/trunk/playground/java/dist/GenomeAnalysisTK.jar T=" + analysis + " I= " + lane + " R= /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta "
print cmd
farm_commands.cmd(cmd, farm_sub, output)