some cleanup, also ensuring that all reads get written into output
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@450 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e8a6cdb386
commit
4f9bc7206f
|
|
@ -1,5 +1,9 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
|
@ -23,15 +27,19 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
private int discarded_cigar_count = 0;
|
private int discarded_cigar_count = 0;
|
||||||
private int discarded_long_read_count = 0;
|
private int discarded_long_read_count = 0;
|
||||||
|
private int discarded_maxerr_count = 0;
|
||||||
|
private int unmapped_read_count = 0;
|
||||||
|
private int total_reads = 0;
|
||||||
private IndelRecordPileCollector collector;
|
private IndelRecordPileCollector collector;
|
||||||
private PileBuilder pileBuilder = null;
|
private PileBuilder pileBuilder = null;
|
||||||
private PassThroughWriter ptWriter;
|
private PassThroughWriter ptWriter;
|
||||||
private String cur_contig = null;
|
private String cur_contig = null;
|
||||||
|
|
||||||
public boolean filter(LocusContext context, SAMRecord read) {
|
|
||||||
// we only want aligned reads
|
// public boolean filter(LocusContext context, SAMRecord read) {
|
||||||
return !read.getReadUnmappedFlag();
|
// // we only want aligned reads
|
||||||
}
|
// return !read.getReadUnmappedFlag();
|
||||||
|
// }
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
|
|
@ -68,13 +76,20 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
public Integer map(LocusContext context, SAMRecord read) {
|
public Integer map(LocusContext context, SAMRecord read) {
|
||||||
|
|
||||||
|
total_reads++;
|
||||||
|
|
||||||
|
if ( read.getReadUnmappedFlag() ) {
|
||||||
|
unmapped_read_count++;
|
||||||
|
ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data!
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ReferenceSequence contig_seq = context.getReferenceContig();
|
ReferenceSequence contig_seq = context.getReferenceContig();
|
||||||
if ( read.getReferenceName() != cur_contig) {
|
if ( read.getReferenceName() != cur_contig) {
|
||||||
cur_contig = read.getReferenceName();
|
cur_contig = read.getReferenceName();
|
||||||
out.println("Contig "+cur_contig);
|
out.println("Contig "+cur_contig);
|
||||||
String refstr = new String(contig_seq.getBases());
|
String refstr = new String(contig_seq.getBases());
|
||||||
collector.setReferenceSequence(refstr);
|
if ( !CONTROL_RUN )
|
||||||
if ( !CONTROL_RUN )
|
|
||||||
pileBuilder.setReferenceSequence(refstr);
|
pileBuilder.setReferenceSequence(refstr);
|
||||||
out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
|
out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
|
||||||
}
|
}
|
||||||
|
|
@ -82,6 +97,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
||||||
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
|
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
|
||||||
|
|
||||||
if ( MAX_READ_LENGTH > 0 && read.getReadLength() > MAX_READ_LENGTH ) {
|
if ( MAX_READ_LENGTH > 0 && read.getReadLength() > MAX_READ_LENGTH ) {
|
||||||
|
ptWriter.receive(read);
|
||||||
discarded_long_read_count++;
|
discarded_long_read_count++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -101,6 +117,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( ! cigar_acceptable ) {
|
if ( ! cigar_acceptable ) {
|
||||||
|
ptWriter.receive(read);
|
||||||
discarded_cigar_count++;
|
discarded_cigar_count++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -124,8 +141,11 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
||||||
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq);
|
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq);
|
||||||
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq);
|
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq);
|
||||||
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq);
|
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq);
|
||||||
if ( err > MAX_ERRS )
|
if ( err > MAX_ERRS ) {
|
||||||
|
ptWriter.receive(read);
|
||||||
|
discarded_maxerr_count++;
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
collector.receive(read);
|
collector.receive(read);
|
||||||
|
|
@ -204,13 +224,21 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
|
collector.close(); // write the reads collector might be still holding
|
||||||
|
|
||||||
if ( ! CONTROL_RUN ) {
|
if ( ! CONTROL_RUN ) {
|
||||||
pileBuilder.printStats();
|
pileBuilder.printStats();
|
||||||
pileBuilder.close();
|
pileBuilder.close();
|
||||||
}
|
}
|
||||||
out.println("done.");
|
out.println("done.");
|
||||||
|
out.println("Total reads processed: "+ total_reads);
|
||||||
|
out.println("Unmodified: "+ ptWriter.getNumReadsReceived());
|
||||||
|
if ( pileBuilder != null) out.println("Written into realigned piles: "+ pileBuilder.getNumReadsWritten());
|
||||||
|
if ( pileBuilder != null) out.println("Received for realignment: "+ pileBuilder.getNumReadsReceived());
|
||||||
out.println("Discarded reads with non-M,I,D cigar elements: "+ discarded_cigar_count);
|
out.println("Discarded reads with non-M,I,D cigar elements: "+ discarded_cigar_count);
|
||||||
out.println("Discarded long reads (above "+MAX_READ_LENGTH+" bp): "+ discarded_long_read_count);
|
out.println("Discarded long reads (above "+MAX_READ_LENGTH+" bp): "+ discarded_long_read_count);
|
||||||
|
out.println("Discarded reads with error counts exceeding the threshold: "+ discarded_maxerr_count);
|
||||||
|
out.println("Unmapped reads (passed through to the output): "+ unmapped_read_count);
|
||||||
out.println();
|
out.println();
|
||||||
collector.printLengthHistograms();
|
collector.printLengthHistograms();
|
||||||
ptWriter.close();
|
ptWriter.close();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue