some cleanup, also ensuring that all reads get written into output

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@450 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-04-16 16:49:25 +00:00
parent e8a6cdb386
commit 4f9bc7206f
1 changed files with 38 additions and 10 deletions

View File

@ -1,5 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
@ -23,18 +27,22 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
private int discarded_cigar_count = 0;
private int discarded_long_read_count = 0;
private int discarded_maxerr_count = 0;
private int unmapped_read_count = 0;
private int total_reads = 0;
private IndelRecordPileCollector collector;
private PileBuilder pileBuilder = null;
private PassThroughWriter ptWriter;
private String cur_contig = null;
public boolean filter(LocusContext context, SAMRecord read) {
// we only want aligned reads
return !read.getReadUnmappedFlag();
}
// public boolean filter(LocusContext context, SAMRecord read) {
// // we only want aligned reads
// return !read.getReadUnmappedFlag();
// }
public void initialize() {
if ( ERR_MODE != null && ! ERR_MODE.equals("MM") && ! ERR_MODE.equals("MG") && ! ERR_MODE.equals("ERR") && ! ERR_MODE.equals("MC") ) {
err.println("Unknown value specified for ERR_MODE: "+ERR_MODE+ "... skipping it.");
ERR_MODE = null;
@ -68,13 +76,20 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) {
total_reads++;
if ( read.getReadUnmappedFlag() ) {
unmapped_read_count++;
ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data!
return 0;
}
ReferenceSequence contig_seq = context.getReferenceContig();
if ( read.getReferenceName() != cur_contig) {
cur_contig = read.getReferenceName();
out.println("Contig "+cur_contig);
String refstr = new String(contig_seq.getBases());
collector.setReferenceSequence(refstr);
if ( !CONTROL_RUN )
if ( !CONTROL_RUN )
pileBuilder.setReferenceSequence(refstr);
out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
}
@ -82,6 +97,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
if ( MAX_READ_LENGTH > 0 && read.getReadLength() > MAX_READ_LENGTH ) {
ptWriter.receive(read);
discarded_long_read_count++;
return 0;
}
@ -101,6 +117,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
}
}
if ( ! cigar_acceptable ) {
ptWriter.receive(read);
discarded_cigar_count++;
return 0;
}
@ -124,8 +141,11 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq);
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq);
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq);
if ( err > MAX_ERRS )
if ( err > MAX_ERRS ) {
ptWriter.receive(read);
discarded_maxerr_count++;
return 0;
}
}
collector.receive(read);
@ -204,15 +224,23 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
}
public void onTraversalDone(Integer result) {
collector.close(); // write the reads collector might be still holding
if ( ! CONTROL_RUN ) {
pileBuilder.printStats();
pileBuilder.close();
}
out.println("done.");
out.println("Total reads processed: "+ total_reads);
out.println("Unmodified: "+ ptWriter.getNumReadsReceived());
if ( pileBuilder != null) out.println("Written into realigned piles: "+ pileBuilder.getNumReadsWritten());
if ( pileBuilder != null) out.println("Received for realignment: "+ pileBuilder.getNumReadsReceived());
out.println("Discarded reads with non-M,I,D cigar elements: "+ discarded_cigar_count);
out.println("Discarded long reads (above "+MAX_READ_LENGTH+" bp): "+ discarded_long_read_count);
out.println("Discarded reads with error counts exceeding the threshold: "+ discarded_maxerr_count);
out.println("Unmapped reads (passed through to the output): "+ unmapped_read_count);
out.println();
collector.printLengthHistograms();
ptWriter.close();
ptWriter.close();
}
}