added some logger calls instead of the warn / scare user calls.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@156 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-03-23 20:27:21 +00:00
parent 5cdc5dffc6
commit d174417fbd
1 changed files with 182 additions and 173 deletions

View File

@ -1,30 +1,33 @@
package org.broadinstitute.sting.gatk; package org.broadinstitute.sting.gatk;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.util.RuntimeIOException;
import edu.mit.broad.picard.filter.SamRecordFilter;
import edu.mit.broad.picard.filter.FilteringIterator; import edu.mit.broad.picard.filter.FilteringIterator;
import edu.mit.broad.picard.reference.ReferenceSequenceFile; import edu.mit.broad.picard.filter.SamRecordFilter;
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
import edu.mit.broad.picard.reference.ReferenceSequence; import edu.mit.broad.picard.reference.ReferenceSequence;
import org.broadinstitute.sting.utils.*; import net.sf.functionalj.Function1;
import net.sf.functionalj.FunctionN;
import net.sf.functionalj.Functions;
import net.sf.functionalj.reflect.JdkStdReflect;
import net.sf.functionalj.reflect.StdReflect;
import net.sf.functionalj.util.Operators;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.RuntimeIOException;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.FastaSequenceFile2;
import org.broadinstitute.sting.utils.FileProgressTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.io.*; import java.io.*;
import java.util.*; import java.util.*;
import net.sf.functionalj.reflect.StdReflect;
import net.sf.functionalj.reflect.JdkStdReflect;
import net.sf.functionalj.FunctionN;
import net.sf.functionalj.Function1;
import net.sf.functionalj.Functions;
import net.sf.functionalj.util.Operators;
public class TraversalEngine { public class TraversalEngine {
// list of reference ordered data objects // list of reference ordered data objects
private List<ReferenceOrderedData> rods = null; private List<ReferenceOrderedData> rods = null;
@ -83,6 +86,12 @@ public class TraversalEngine {
public long N_RECORDS_TO_PRINT = 100000; public long N_RECORDS_TO_PRINT = 100000;
public int THREADED_IO_BUFFER_SIZE = 10000; public int THREADED_IO_BUFFER_SIZE = 10000;
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(GenomeAnalysisTK.class);
// Locations we are going to process during the traversal // Locations we are going to process during the traversal
private GenomeLoc[] locs = null; private GenomeLoc[] locs = null;
@ -112,14 +121,24 @@ public class TraversalEngine {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
//public void setRegion(final String reg) { regionStr = regionStr; } //public void setRegion(final String reg) { regionStr = regionStr; }
//public void setTraversalType(final String type) { traversalType = type; } //public void setTraversalType(final String type) { traversalType = type; }
public void setStrictness( final ValidationStringency s ) { strictness = s; } public void setStrictness(final ValidationStringency s) {
public void setMaxReads( final int maxReads ) { this.maxReads = maxReads; } strictness = s;
public void setDebugging( final boolean d ) { DEBUGGING = d; } }
public void setMaxReads(final int maxReads) {
this.maxReads = maxReads;
}
public void setDebugging(final boolean d) {
DEBUGGING = d;
}
public void setSafetyChecking(final boolean beSafeP) { public void setSafetyChecking(final boolean beSafeP) {
if (!beSafeP) if (!beSafeP)
System.out.printf("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...%n"); System.out.printf("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...%n");
this.beSafeP = beSafeP; this.beSafeP = beSafeP;
} }
public void setSortOnFly(final boolean SORT_ON_FLY) { public void setSortOnFly(final boolean SORT_ON_FLY) {
if (SORT_ON_FLY) if (SORT_ON_FLY)
System.out.println("Sorting read file on the fly: max reads allowed is " + MAX_ON_FLY_SORTS); System.out.println("Sorting read file on the fly: max reads allowed is " + MAX_ON_FLY_SORTS);
@ -154,29 +173,26 @@ public class TraversalEngine {
* *
* @param file_name * @param file_name
*/ */
public void setLocationFromFile( final String file_name ) public void setLocationFromFile(final String file_name) {
{
String locStr = ""; String locStr = "";
Scanner scanner = null; Scanner scanner = null;
try try {
{
scanner = new Scanner(new File(file_name)); scanner = new Scanner(new File(file_name));
while ( scanner.hasNextLine() ) while (scanner.hasNextLine()) {
{
String line = scanner.nextLine(); String line = scanner.nextLine();
line.replaceAll("\n", ""); line.replaceAll("\n", "");
locStr += line; locStr += line;
if (scanner.hasNextLine()) { locStr += ";"; } if (scanner.hasNextLine()) {
locStr += ";";
} }
} }
catch (Exception e) }
{ catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
System.exit(-1); System.exit(-1);
} }
finally finally {
{
//ensure the underlying stream is always closed //ensure the underlying stream is always closed
scanner.close(); scanner.close();
} }
@ -207,8 +223,8 @@ public class TraversalEngine {
System.out.printf(" Locations are: %s%n", Utils.join("\n", Functions.map(Operators.toString, Arrays.asList(locs)))); System.out.printf(" Locations are: %s%n", Utils.join("\n", Functions.map(Operators.toString, Arrays.asList(locs))));
return locs; return locs;
} catch (Exception e) { } catch (Exception e) {
Utils.scareUser(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str)); logger.fatal(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str));
return null; throw new IllegalArgumentException("Invalid locations string: " + str + ", format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'");
} }
} }
@ -220,14 +236,10 @@ public class TraversalEngine {
* @return true if we should process GenomeLoc curr, otherwise false * @return true if we should process GenomeLoc curr, otherwise false
*/ */
public boolean inLocations(GenomeLoc curr) { public boolean inLocations(GenomeLoc curr) {
if ( this.locs == null ) if (this.locs == null) {
{
return true; return true;
} } else {
else for (GenomeLoc loc : this.locs) {
{
for ( GenomeLoc loc : this.locs )
{
//System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr)); //System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr));
if (loc.overlapsP(curr)) if (loc.overlapsP(curr))
return true; return true;
@ -256,7 +268,6 @@ public class TraversalEngine {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
/** /**
*
* @param curTime (current runtime, in millisecs) * @param curTime (current runtime, in millisecs)
* @return true if the maximum interval (in millisecs) has passed since the last printing * @return true if the maximum interval (in millisecs) has passed since the last printing
*/ */
@ -379,8 +390,7 @@ public class TraversalEngine {
// If the file has an index, querying functions are available. Use them if possible... // If the file has an index, querying functions are available. Use them if possible...
if (samReader.hasIndex()) { if (samReader.hasIndex()) {
iterator = new SamQueryIterator(samReader, locs); iterator = new SamQueryIterator(samReader, locs);
} } else {
else {
// Ugh. Close and reopen the file so that the file progress decorator can be assigned to the input stream. // Ugh. Close and reopen the file so that the file progress decorator can be assigned to the input stream.
samReader.close(); samReader.close();
@ -399,7 +409,6 @@ public class TraversalEngine {
/** /**
* Prepare the reference for stream processing * Prepare the reference for stream processing
*
*/ */
protected void initializeReference() { protected void initializeReference() {
if (refFileName != null) { if (refFileName != null) {
@ -408,7 +417,8 @@ public class TraversalEngine {
this.refIter = new ReferenceIterator(this.refFile); this.refIter = new ReferenceIterator(this.refFile);
if (!Utils.setupRefContigOrdering(this.refFile)) { if (!Utils.setupRefContigOrdering(this.refFile)) {
// We couldn't process the reference contig ordering, fail since we need it // We couldn't process the reference contig ordering, fail since we need it
Utils.scareUser(String.format("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. In the near future this program will automatically construct the dictionary for you and save it down.", refFileName)); logger.fatal(String.format("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. In the near future this program will automatically construct the dictionary for you and save it down.", refFileName));
throw new RuntimeException("We couldn't load the contig dictionary associated with " + refFileName + ". At the current time we require this dictionary file to efficiently access the FASTA file. In the near future this program will automatically construct the dictionary for you and save it down.");
} }
} }
} }
@ -473,7 +483,6 @@ public class TraversalEngine {
* Class to filter out un-handle-able reads from the stream. We currently are skipping * Class to filter out un-handle-able reads from the stream. We currently are skipping
* unmapped reads, non-primary reads, unaligned reads, and those with indels. We should * unmapped reads, non-primary reads, unaligned reads, and those with indels. We should
* really change this to handle indel containing reads. * really change this to handle indel containing reads.
*
*/ */
class locusStreamFilterFunc implements SamRecordFilter { class locusStreamFilterFunc implements SamRecordFilter {
public boolean filterOut(SAMRecord rec) { public boolean filterOut(SAMRecord rec) {
@ -483,26 +492,22 @@ public class TraversalEngine {
nUnmappedReads++; nUnmappedReads++;
result = true; result = true;
why = "Unmapped"; why = "Unmapped";
} } else if (rec.getNotPrimaryAlignmentFlag()) {
else if ( rec.getNotPrimaryAlignmentFlag() ) {
nNotPrimary++; nNotPrimary++;
result = true; result = true;
why = "Not Primary"; why = "Not Primary";
} } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
else if ( rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START ) {
nBadAlignments++; nBadAlignments++;
result = true; result = true;
why = "No alignment start"; why = "No alignment start";
} } else {
else {
result = false; result = false;
} }
if (result) { if (result) {
nSkippedReads++; nSkippedReads++;
//System.out.printf(" [filter] %s => %b %s%n", rec.getReadName(), result, why); //System.out.printf(" [filter] %s => %b %s%n", rec.getReadName(), result, why);
} } else {
else {
nReads++; nReads++;
} }
return result; return result;
@ -515,7 +520,7 @@ public class TraversalEngine {
if (requiresSortedOrder || strictness == SAMFileReader.ValidationStringency.STRICT) if (requiresSortedOrder || strictness == SAMFileReader.ValidationStringency.STRICT)
throw new RuntimeIOException(msg); throw new RuntimeIOException(msg);
else if (strictness == SAMFileReader.ValidationStringency.LENIENT) else if (strictness == SAMFileReader.ValidationStringency.LENIENT)
Utils.warnUser(msg); logger.warn(msg);
} }
} }
@ -547,8 +552,7 @@ public class TraversalEngine {
int current_interval_index = -1; int current_interval_index = -1;
int current_interval_offset = -1; int current_interval_offset = -1;
while ( iter.hasNext() && ! done ) while (iter.hasNext() && !done) {
{
this.nRecords++; this.nRecords++;
// actually get the read and hand it to the walker // actually get the read and hand it to the walker
@ -556,20 +560,20 @@ public class TraversalEngine {
// Poor man's version of index LOL // Poor man's version of index LOL
// HALP! I HAZ 10K INTERVALS 2 INDX // HALP! I HAZ 10K INTERVALS 2 INDX
if ( ((this.locs != null) && (this.locs.length != 0)) && ((current_interval_index == -1) || (!locus.getLocation().overlapsP(this.locs[current_interval_index])))) if (((this.locs != null) && (this.locs.length != 0)) && ((current_interval_index == -1) || (!locus.getLocation().overlapsP(this.locs[current_interval_index])))) {
{
// Advance to the next locus. // Advance to the next locus.
current_interval_index += 1; current_interval_index += 1;
current_interval_offset = 0; current_interval_offset = 0;
if (this.locs.length <= current_interval_index) { done = true; break; } if (this.locs.length <= current_interval_index) {
done = true;
break;
}
//System.out.format("DEBUG Seeking from %s to %s\n", locus.getLocation().toString(), this.locs[current_interval_index].toString()); //System.out.format("DEBUG Seeking from %s to %s\n", locus.getLocation().toString(), this.locs[current_interval_index].toString());
while ((this.locs.length > current_interval_index) && (!locus.getLocation().overlapsP(this.locs[current_interval_index])) && (iter.hasNext())) while ((this.locs.length > current_interval_index) && (!locus.getLocation().overlapsP(this.locs[current_interval_index])) && (iter.hasNext())) {
{ switch (locus.getLocation().compareTo(this.locs[current_interval_index])) {
switch (locus.getLocation().compareTo(this.locs[current_interval_index]))
{
case -1: case -1:
locus = iter.next(); locus = iter.next();
//System.out.format("DEBUG at %s\n", locus.getLocation().toString()); //System.out.format("DEBUG at %s\n", locus.getLocation().toString());
@ -579,17 +583,21 @@ public class TraversalEngine {
case 1: case 1:
current_interval_index += 1; current_interval_index += 1;
current_interval_offset = 0; current_interval_offset = 0;
if (this.locs.length <= current_interval_index) { done = true; break; } if (this.locs.length <= current_interval_index) {
done = true;
break;
}
//System.out.format("DEBUG Giving up on old locus, Seeking from %s to %s\n", locus.getLocation().toString(), this.locs[current_interval_index].toString()); //System.out.format("DEBUG Giving up on old locus, Seeking from %s to %s\n", locus.getLocation().toString(), this.locs[current_interval_index].toString());
break; break;
} }
} }
if (this.locs.length <= current_interval_index) { done = true; break; } if (this.locs.length <= current_interval_index) {
done = true;
break;
}
//System.out.format("DEBUG Got there.\n"); //System.out.format("DEBUG Got there.\n");
} } else {
else
{
current_interval_offset += 1; current_interval_offset += 1;
} }
@ -640,9 +648,10 @@ public class TraversalEngine {
* the walker object, in coordinate order. Supports all of the * the walker object, in coordinate order. Supports all of the
* interaction contract implied by the read walker * interaction contract implied by the read walker
* sor * sor
*
* @param walker A read walker object * @param walker A read walker object
* @param <M> MapType -- the result of calling map() on walker * @param <M> MapType -- the result of calling map() on walker
* @param <T> ReduceType -- the result of calling reduce() on the walker * @param <R> ReduceType -- the result of calling reduce() on the walker
* @return 0 on success * @return 0 on success
*/ */
protected <M, R> int traverseByRead(ReadWalker<M, R> walker) { protected <M, R> int traverseByRead(ReadWalker<M, R> walker) {