some cleanup, also ensuring that all reads get written into output

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@450 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-04-16 16:49:25 +00:00
parent e8a6cdb386
commit 4f9bc7206f
1 changed files with 38 additions and 10 deletions

View File

@ -1,5 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers; package org.broadinstitute.sting.playground.gatk.walkers;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.gatk.walkers.WalkerName;
@ -23,15 +27,19 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
private int discarded_cigar_count = 0; private int discarded_cigar_count = 0;
private int discarded_long_read_count = 0; private int discarded_long_read_count = 0;
private int discarded_maxerr_count = 0;
private int unmapped_read_count = 0;
private int total_reads = 0;
private IndelRecordPileCollector collector; private IndelRecordPileCollector collector;
private PileBuilder pileBuilder = null; private PileBuilder pileBuilder = null;
private PassThroughWriter ptWriter; private PassThroughWriter ptWriter;
private String cur_contig = null; private String cur_contig = null;
public boolean filter(LocusContext context, SAMRecord read) {
// we only want aligned reads // public boolean filter(LocusContext context, SAMRecord read) {
return !read.getReadUnmappedFlag(); // // we only want aligned reads
} // return !read.getReadUnmappedFlag();
// }
public void initialize() { public void initialize() {
@ -68,12 +76,19 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) { public Integer map(LocusContext context, SAMRecord read) {
total_reads++;
if ( read.getReadUnmappedFlag() ) {
unmapped_read_count++;
ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data!
return 0;
}
ReferenceSequence contig_seq = context.getReferenceContig(); ReferenceSequence contig_seq = context.getReferenceContig();
if ( read.getReferenceName() != cur_contig) { if ( read.getReferenceName() != cur_contig) {
cur_contig = read.getReferenceName(); cur_contig = read.getReferenceName();
out.println("Contig "+cur_contig); out.println("Contig "+cur_contig);
String refstr = new String(contig_seq.getBases()); String refstr = new String(contig_seq.getBases());
collector.setReferenceSequence(refstr);
if ( !CONTROL_RUN ) if ( !CONTROL_RUN )
pileBuilder.setReferenceSequence(refstr); pileBuilder.setReferenceSequence(refstr);
out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString()); out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
@ -82,6 +97,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now // if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
if ( MAX_READ_LENGTH > 0 && read.getReadLength() > MAX_READ_LENGTH ) { if ( MAX_READ_LENGTH > 0 && read.getReadLength() > MAX_READ_LENGTH ) {
ptWriter.receive(read);
discarded_long_read_count++; discarded_long_read_count++;
return 0; return 0;
} }
@ -101,6 +117,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
} }
} }
if ( ! cigar_acceptable ) { if ( ! cigar_acceptable ) {
ptWriter.receive(read);
discarded_cigar_count++; discarded_cigar_count++;
return 0; return 0;
} }
@ -124,9 +141,12 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq); else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq);
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq); else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq);
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq); else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq);
if ( err > MAX_ERRS ) if ( err > MAX_ERRS ) {
ptWriter.receive(read);
discarded_maxerr_count++;
return 0; return 0;
} }
}
collector.receive(read); collector.receive(read);
return 1; return 1;
@ -204,13 +224,21 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
} }
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
collector.close(); // write the reads collector might be still holding
if ( ! CONTROL_RUN ) { if ( ! CONTROL_RUN ) {
pileBuilder.printStats(); pileBuilder.printStats();
pileBuilder.close(); pileBuilder.close();
} }
out.println("done."); out.println("done.");
out.println("Total reads processed: "+ total_reads);
out.println("Unmodified: "+ ptWriter.getNumReadsReceived());
if ( pileBuilder != null) out.println("Written into realigned piles: "+ pileBuilder.getNumReadsWritten());
if ( pileBuilder != null) out.println("Received for realignment: "+ pileBuilder.getNumReadsReceived());
out.println("Discarded reads with non-M,I,D cigar elements: "+ discarded_cigar_count); out.println("Discarded reads with non-M,I,D cigar elements: "+ discarded_cigar_count);
out.println("Discarded long reads (above "+MAX_READ_LENGTH+" bp): "+ discarded_long_read_count); out.println("Discarded long reads (above "+MAX_READ_LENGTH+" bp): "+ discarded_long_read_count);
out.println("Discarded reads with error counts exceeding the threshold: "+ discarded_maxerr_count);
out.println("Unmapped reads (passed through to the output): "+ unmapped_read_count);
out.println(); out.println();
collector.printLengthHistograms(); collector.printLengthHistograms();
ptWriter.close(); ptWriter.close();