fix a few related issues when not all the reads were written into the output files. now cleaned output still contains all reads either with modified alignments or untouched

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@444 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-04-16 03:56:47 +00:00
parent 0d324354ae
commit 240eb18564
3 changed files with 48 additions and 14 deletions

View File

@ -91,8 +91,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
private boolean controlRun = false; private boolean controlRun = false;
private String referenceSequence;
public String memStatsString() { public String memStatsString() {
String s = "mRecordPile: "; String s = "mRecordPile: ";
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef; return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
@ -115,7 +113,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
} }
defaultReceiver = rr; defaultReceiver = rr;
indelPileReceiver = rp; indelPileReceiver = rp;
referenceSequence = null;
setWaitState(); setWaitState();
} }
@ -123,6 +120,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
* Does not emit records, just clears/resets the variables. * Does not emit records, just clears/resets the variables.
*/ */
private void setWaitState() { private void setWaitState() {
mRecordPile.clear(); mRecordPile.clear();
mAllIndels.clear(); mAllIndels.clear();
// mIndelRegionStart = 1000000000; // mIndelRegionStart = 1000000000;
@ -133,9 +131,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
public void setControlRun(boolean c) { controlRun = c; } public void setControlRun(boolean c) { controlRun = c; }
public void setReferenceSequence(String contig) {
referenceSequence = contig;
}
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile /** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
* all the consequtive records with alignment end positions less than or equal to the specified * all the consequtive records with alignment end positions less than or equal to the specified
@ -170,6 +165,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
} else break; } else break;
} }
} }
/** This method MUST be called when no more reads are left in order to enforce the collector to emit the current pile of reads
* it is still holding.
*/
public void close() {
emit();
}
/** This is the main interface method of the collector: it receives alignments, inspects them, detects indels, /** This is the main interface method of the collector: it receives alignments, inspects them, detects indels,
* updates and purges the read pile it keeps and emits alignments as needed. * updates and purges the read pile it keeps and emits alignments as needed.
@ -198,7 +200,10 @@ public class IndelRecordPileCollector implements RecordReceiver {
*/ */
public void receive(final SAMRecord r) throws RuntimeException { public void receive(final SAMRecord r) throws RuntimeException {
if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do if ( r.getReadUnmappedFlag() ) {
defaultReceiver.receive(r); // do not throw reads away even if they are of no use for us, keep them in the output bam....
return; // read did not align, nothing to do
}
if ( controlRun ) { if ( controlRun ) {
defaultReceiver.receive(r); defaultReceiver.receive(r);
@ -230,9 +235,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
// does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active' // does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active'
extractIndelsAndUpdateState(r.getCigar(),currPos); extractIndelsAndUpdateState(r.getCigar(),currPos);
if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) avoiding_region = true; if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) {
avoiding_region = true;
}
if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region
else defaultReceiver.receive(r); // if we do not want to or can not deal with a region, pass reads through;
// the pile we have already collected before discovering it's a bad region will be sent through on the next call to emit()
mLastContig = currContig; mLastContig = currContig;
mLastStartOnRef = currPos; mLastStartOnRef = currPos;
@ -275,6 +284,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
// can be more than one pile in what we have stored. Also, we can still have gapless reads // can be more than one pile in what we have stored. Also, we can still have gapless reads
// at the ends of the piles that do not really overlap with indel sites. // at the ends of the piles that do not really overlap with indel sites.
if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels"); if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels");
HistogramAsNeeded(mAllIndels); HistogramAsNeeded(mAllIndels);
@ -333,7 +343,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
// and can be emitted // and can be emitted
if ( shouldAcceptForOutput(finalTrain ) ) { if ( shouldAcceptForOutput(finalTrain ) ) {
System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" + System.out.print("SITE: " + mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " + finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
finalTrain.size() + " indels; "); finalTrain.size() + " indels; ");
System.out.print(finalPile.size() + " reads in the pile;") ; System.out.print(finalPile.size() + " reads in the pile;") ;
@ -351,6 +361,11 @@ public class IndelRecordPileCollector implements RecordReceiver {
// with building the indel train // with building the indel train
} }
// we may still have reads in the original pile that start after the last indel:
for ( SAMRecord r : mRecordPile ) {
defaultReceiver.receive(r);
}
setWaitState(); setWaitState();
} }

View File

@ -16,6 +16,7 @@ import java.io.File;
*/ */
public class PassThroughWriter implements RecordReceiver { public class PassThroughWriter implements RecordReceiver {
private SAMFileWriter writer; private SAMFileWriter writer;
private int reads_written = 0;
public PassThroughWriter( File f, SAMFileHeader h) { public PassThroughWriter( File f, SAMFileHeader h) {
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f); writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f);
@ -28,7 +29,13 @@ public class PassThroughWriter implements RecordReceiver {
public void receive(SAMRecord r) { public void receive(SAMRecord r) {
//To change body of implemented methods use File | Settings | File Templates. //To change body of implemented methods use File | Settings | File Templates.
writer.addAlignment(r); writer.addAlignment(r);
reads_written++;
} }
public void close() { writer.close() ; } public void close() { writer.close() ; }
/** Returns the number of reads that were so far received by this writer.
*
*/
public int getNumReadsReceived() { return reads_written; }
} }

View File

@ -34,6 +34,9 @@ public class PileBuilder implements RecordPileReceiver {
private int total_reads_in_improved = 0; private int total_reads_in_improved = 0;
private int total_reads_in_failed = 0; private int total_reads_in_failed = 0;
private int total_alignments_modified = 0; private int total_alignments_modified = 0;
private int total_reads_received = 0;
private int total_reads_written = 0;
public final static int SILENT = 0; public final static int SILENT = 0;
public final static int PILESUMMARY = 1; public final static int PILESUMMARY = 1;
@ -115,10 +118,22 @@ public class PileBuilder implements RecordPileReceiver {
reference_start = -1; reference_start = -1;
} }
/** Returns the number of reads that were so far received by this writer.
*
*/
public int getNumReadsReceived() { return total_reads_received; }
/** Returns the number of reads that were so far written by this writer (NOT sent
* into its secondary "failed mode" receiver!)
*
*/
public int getNumReadsWritten() { return total_reads_written; }
public void receive(Collection<SAMRecord> c) { public void receive(Collection<SAMRecord> c) {
//TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup. //TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup.
processed_piles++; processed_piles++;
total_reads_received += c.size();
IndexedSequence[] seqs = new IndexedSequence[c.size()]; IndexedSequence[] seqs = new IndexedSequence[c.size()];
int i = 0; int i = 0;
@ -357,7 +372,7 @@ public class PileBuilder implements RecordPileReceiver {
// System.out.println("writing " + id); // System.out.println("writing " + id);
samWriter.addAlignment(r); samWriter.addAlignment(r);
total_reads_written++;
} }
} }
@ -619,12 +634,9 @@ public class PileBuilder implements RecordPileReceiver {
} }
public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) { public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) {
SelectedPair p = new SelectedPair();
double d_av = 0; double d_av = 0;
int nseq = 0; int nseq = 0;
int i1 = -1;
int i2 = -1;
for ( Integer id2 : a2 ) { for ( Integer id2 : a2 ) {
SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2)); SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2));