fix a few related issues when not all the reads were written into the output files. now cleaned output still contains all reads either with modified alignments or untouched

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@444 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-04-16 03:56:47 +00:00
parent 0d324354ae
commit 240eb18564
3 changed files with 48 additions and 14 deletions

View File

@ -91,8 +91,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
private boolean controlRun = false;
private String referenceSequence;
public String memStatsString() {
String s = "mRecordPile: ";
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
@ -115,7 +113,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
}
defaultReceiver = rr;
indelPileReceiver = rp;
referenceSequence = null;
setWaitState();
}
@ -123,6 +120,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
* Does not emit records, just clears/resets the variables.
*/
private void setWaitState() {
mRecordPile.clear();
mAllIndels.clear();
// mIndelRegionStart = 1000000000;
@ -133,9 +131,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
public void setControlRun(boolean c) { controlRun = c; }
public void setReferenceSequence(String contig) {
referenceSequence = contig;
}
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
* all the consequtive records with alignment end positions less than or equal to the specified
@ -170,6 +165,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
} else break;
}
}
/** This method MUST be called when no more reads are left in order to enforce the collector to emit the current pile of reads
* it is still holding.
*/
public void close() {
emit();
}
/** This is the main interface method of the collector: it receives alignments, inspects them, detects indels,
* updates and purges the read pile it keeps and emits alignments as needed.
@ -198,7 +200,10 @@ public class IndelRecordPileCollector implements RecordReceiver {
*/
public void receive(final SAMRecord r) throws RuntimeException {
if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do
if ( r.getReadUnmappedFlag() ) {
defaultReceiver.receive(r); // do not throw reads away even if they are of no use for us, keep them in the output bam....
return; // read did not align, nothing to do
}
if ( controlRun ) {
defaultReceiver.receive(r);
@ -230,9 +235,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
// does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active'
extractIndelsAndUpdateState(r.getCigar(),currPos);
if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) avoiding_region = true;
if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) {
avoiding_region = true;
}
if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region
else defaultReceiver.receive(r); // if we do not want to or can not deal with a region, pass reads through;
// the pile we have already collected before discovering it's a bad region will be sent through on the next call to emit()
mLastContig = currContig;
mLastStartOnRef = currPos;
@ -275,6 +284,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
// can be more than one pile in what we have stored. Also, we can still have gapless reads
// at the ends of the piles that do not really overlap with indel sites.
if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels");
HistogramAsNeeded(mAllIndels);
@ -333,7 +343,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
// and can be emitted
if ( shouldAcceptForOutput(finalTrain ) ) {
System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
System.out.print("SITE: " + mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
finalTrain.size() + " indels; ");
System.out.print(finalPile.size() + " reads in the pile;") ;
@ -351,6 +361,11 @@ public class IndelRecordPileCollector implements RecordReceiver {
// with building the indel train
}
// we may still have reads in the original pile that start after the last indel:
for ( SAMRecord r : mRecordPile ) {
defaultReceiver.receive(r);
}
setWaitState();
}

View File

@ -16,6 +16,7 @@ import java.io.File;
*/
public class PassThroughWriter implements RecordReceiver {
private SAMFileWriter writer;
private int reads_written = 0;
public PassThroughWriter( File f, SAMFileHeader h) {
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f);
@ -28,7 +29,13 @@ public class PassThroughWriter implements RecordReceiver {
public void receive(SAMRecord r) {
//To change body of implemented methods use File | Settings | File Templates.
writer.addAlignment(r);
reads_written++;
}
public void close() { writer.close() ; }
/** Returns the number of reads that were so far received by this writer.
*
*/
public int getNumReadsReceived() { return reads_written; }
}

View File

@ -34,6 +34,9 @@ public class PileBuilder implements RecordPileReceiver {
private int total_reads_in_improved = 0;
private int total_reads_in_failed = 0;
private int total_alignments_modified = 0;
private int total_reads_received = 0;
private int total_reads_written = 0;
public final static int SILENT = 0;
public final static int PILESUMMARY = 1;
@ -115,10 +118,22 @@ public class PileBuilder implements RecordPileReceiver {
reference_start = -1;
}
/** Returns the number of reads that were so far received by this writer.
*
*/
public int getNumReadsReceived() { return total_reads_received; }
/** Returns the number of reads that were so far written by this writer (NOT sent
* into its secondary "failed mode" receiver!)
*
*/
public int getNumReadsWritten() { return total_reads_written; }
public void receive(Collection<SAMRecord> c) {
//TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup.
processed_piles++;
total_reads_received += c.size();
IndexedSequence[] seqs = new IndexedSequence[c.size()];
int i = 0;
@ -357,7 +372,7 @@ public class PileBuilder implements RecordPileReceiver {
// System.out.println("writing " + id);
samWriter.addAlignment(r);
total_reads_written++;
}
}
@ -619,12 +634,9 @@ public class PileBuilder implements RecordPileReceiver {
}
public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) {
SelectedPair p = new SelectedPair();
double d_av = 0;
int nseq = 0;
int i1 = -1;
int i2 = -1;
for ( Integer id2 : a2 ) {
SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2));