diff --git a/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java b/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java index 56be41780..bad34be3a 100755 --- a/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java +++ b/java/src/org/broadinstitute/sting/playground/indels/IndelRecordPileCollector.java @@ -91,8 +91,6 @@ public class IndelRecordPileCollector implements RecordReceiver { private boolean controlRun = false; - private String referenceSequence; - public String memStatsString() { String s = "mRecordPile: "; return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef; @@ -115,7 +113,6 @@ public class IndelRecordPileCollector implements RecordReceiver { } defaultReceiver = rr; indelPileReceiver = rp; - referenceSequence = null; setWaitState(); } @@ -123,6 +120,7 @@ public class IndelRecordPileCollector implements RecordReceiver { * Does not emit records, just clears/resets the variables. */ private void setWaitState() { + mRecordPile.clear(); mAllIndels.clear(); // mIndelRegionStart = 1000000000; @@ -133,9 +131,6 @@ public class IndelRecordPileCollector implements RecordReceiver { public void setControlRun(boolean c) { controlRun = c; } - public void setReferenceSequence(String contig) { - referenceSequence = contig; - } /** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile * all the consequtive records with alignment end positions less than or equal to the specified @@ -170,6 +165,13 @@ public class IndelRecordPileCollector implements RecordReceiver { } else break; } } + + /** This method MUST be called when no more reads are left in order to enforce the collector to emit the current pile of reads + * it is still holding. + */ + public void close() { + emit(); + } /** This is the main interface method of the collector: it receives alignments, inspects them, detects indels, * updates and purges the read pile it keeps and emits alignments as needed. @@ -198,7 +200,10 @@ public class IndelRecordPileCollector implements RecordReceiver { */ public void receive(final SAMRecord r) throws RuntimeException { - if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do + if ( r.getReadUnmappedFlag() ) { + defaultReceiver.receive(r); // do not throw reads away even if they are of no use for us, keep them in the output bam.... + return; // read did not align, nothing to do + } if ( controlRun ) { defaultReceiver.receive(r); @@ -230,9 +235,13 @@ public class IndelRecordPileCollector implements RecordReceiver { // does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active' extractIndelsAndUpdateState(r.getCigar(),currPos); - if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) avoiding_region = true; + if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) { + avoiding_region = true; + } if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region + else defaultReceiver.receive(r); // if we do not want to or can not deal with a region, pass reads through; + // the pile we have already collected before discovering it's a bad region will be sent through on the next call to emit() mLastContig = currContig; mLastStartOnRef = currPos; @@ -275,6 +284,7 @@ public class IndelRecordPileCollector implements RecordReceiver { // can be more than one pile in what we have stored. Also, we can still have gapless reads // at the ends of the piles that do not really overlap with indel sites. + if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels"); HistogramAsNeeded(mAllIndels); @@ -333,7 +343,7 @@ public class IndelRecordPileCollector implements RecordReceiver { // and can be emitted if ( shouldAcceptForOutput(finalTrain ) ) { - System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" + + System.out.print("SITE: " + mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" + finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " + finalTrain.size() + " indels; "); System.out.print(finalPile.size() + " reads in the pile;") ; @@ -351,6 +361,11 @@ public class IndelRecordPileCollector implements RecordReceiver { // with building the indel train } + // we may still have reads in the original pile that start after the last indel: + for ( SAMRecord r : mRecordPile ) { + defaultReceiver.receive(r); + } + setWaitState(); } diff --git a/java/src/org/broadinstitute/sting/playground/indels/PassThroughWriter.java b/java/src/org/broadinstitute/sting/playground/indels/PassThroughWriter.java index 26b651bce..98cdf403e 100644 --- a/java/src/org/broadinstitute/sting/playground/indels/PassThroughWriter.java +++ b/java/src/org/broadinstitute/sting/playground/indels/PassThroughWriter.java @@ -16,6 +16,7 @@ import java.io.File; */ public class PassThroughWriter implements RecordReceiver { private SAMFileWriter writer; + private int reads_written = 0; public PassThroughWriter( File f, SAMFileHeader h) { writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f); @@ -28,7 +29,13 @@ public class PassThroughWriter implements RecordReceiver { public void receive(SAMRecord r) { //To change body of implemented methods use File | Settings | File Templates. writer.addAlignment(r); + reads_written++; } public void close() { writer.close() ; } + + /** Returns the number of reads that were so far received by this writer. + * + */ + public int getNumReadsReceived() { return reads_written; } } diff --git a/java/src/org/broadinstitute/sting/playground/indels/PileBuilder.java b/java/src/org/broadinstitute/sting/playground/indels/PileBuilder.java index 73b9ba30c..0044dab98 100755 --- a/java/src/org/broadinstitute/sting/playground/indels/PileBuilder.java +++ b/java/src/org/broadinstitute/sting/playground/indels/PileBuilder.java @@ -34,6 +34,9 @@ public class PileBuilder implements RecordPileReceiver { private int total_reads_in_improved = 0; private int total_reads_in_failed = 0; private int total_alignments_modified = 0; + + private int total_reads_received = 0; + private int total_reads_written = 0; public final static int SILENT = 0; public final static int PILESUMMARY = 1; @@ -115,10 +118,22 @@ public class PileBuilder implements RecordPileReceiver { reference_start = -1; } + /** Returns the number of reads that were so far received by this writer. + * + */ + public int getNumReadsReceived() { return total_reads_received; } + + /** Returns the number of reads that were so far written by this writer (NOT sent + * into its secondary "failed mode" receiver!) + * + */ + public int getNumReadsWritten() { return total_reads_written; } + public void receive(Collection c) { //TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup. processed_piles++; + total_reads_received += c.size(); IndexedSequence[] seqs = new IndexedSequence[c.size()]; int i = 0; @@ -357,7 +372,7 @@ public class PileBuilder implements RecordPileReceiver { // System.out.println("writing " + id); samWriter.addAlignment(r); - + total_reads_written++; } } @@ -619,12 +634,9 @@ public class PileBuilder implements RecordPileReceiver { } public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) { - SelectedPair p = new SelectedPair(); - + double d_av = 0; int nseq = 0; - int i1 = -1; - int i2 = -1; for ( Integer id2 : a2 ) { SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2));