fix a few related issues when not all the reads were written into the output files. now cleaned output still contains all reads either with modified alignments or untouched
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@444 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0d324354ae
commit
240eb18564
|
|
@ -91,8 +91,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
|
|
||||||
private boolean controlRun = false;
|
private boolean controlRun = false;
|
||||||
|
|
||||||
private String referenceSequence;
|
|
||||||
|
|
||||||
public String memStatsString() {
|
public String memStatsString() {
|
||||||
String s = "mRecordPile: ";
|
String s = "mRecordPile: ";
|
||||||
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
|
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
|
||||||
|
|
@ -115,7 +113,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
}
|
}
|
||||||
defaultReceiver = rr;
|
defaultReceiver = rr;
|
||||||
indelPileReceiver = rp;
|
indelPileReceiver = rp;
|
||||||
referenceSequence = null;
|
|
||||||
setWaitState();
|
setWaitState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -123,6 +120,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
* Does not emit records, just clears/resets the variables.
|
* Does not emit records, just clears/resets the variables.
|
||||||
*/
|
*/
|
||||||
private void setWaitState() {
|
private void setWaitState() {
|
||||||
|
|
||||||
mRecordPile.clear();
|
mRecordPile.clear();
|
||||||
mAllIndels.clear();
|
mAllIndels.clear();
|
||||||
// mIndelRegionStart = 1000000000;
|
// mIndelRegionStart = 1000000000;
|
||||||
|
|
@ -133,9 +131,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
|
|
||||||
public void setControlRun(boolean c) { controlRun = c; }
|
public void setControlRun(boolean c) { controlRun = c; }
|
||||||
|
|
||||||
public void setReferenceSequence(String contig) {
|
|
||||||
referenceSequence = contig;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
|
/** A utility method: emits into nonindelReceiver and purges from the currently held SAM record pile
|
||||||
* all the consequtive records with alignment end positions less than or equal to the specified
|
* all the consequtive records with alignment end positions less than or equal to the specified
|
||||||
|
|
@ -171,6 +166,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** This method MUST be called when no more reads are left in order to enforce the collector to emit the current pile of reads
|
||||||
|
* it is still holding.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
emit();
|
||||||
|
}
|
||||||
|
|
||||||
/** This is the main interface method of the collector: it receives alignments, inspects them, detects indels,
|
/** This is the main interface method of the collector: it receives alignments, inspects them, detects indels,
|
||||||
* updates and purges the read pile it keeps and emits alignments as needed.
|
* updates and purges the read pile it keeps and emits alignments as needed.
|
||||||
* Depending on the state, the following behaviors are possible
|
* Depending on the state, the following behaviors are possible
|
||||||
|
|
@ -198,7 +200,10 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
*/
|
*/
|
||||||
public void receive(final SAMRecord r) throws RuntimeException {
|
public void receive(final SAMRecord r) throws RuntimeException {
|
||||||
|
|
||||||
if ( r.getReadUnmappedFlag() ) return; // read did not align, nothing to do
|
if ( r.getReadUnmappedFlag() ) {
|
||||||
|
defaultReceiver.receive(r); // do not throw reads away even if they are of no use for us, keep them in the output bam....
|
||||||
|
return; // read did not align, nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
if ( controlRun ) {
|
if ( controlRun ) {
|
||||||
defaultReceiver.receive(r);
|
defaultReceiver.receive(r);
|
||||||
|
|
@ -230,9 +235,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
// does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active'
|
// does nothing if alignment has no indels, otherwise adds the indels to the list and (re)sets state to 'active'
|
||||||
extractIndelsAndUpdateState(r.getCigar(),currPos);
|
extractIndelsAndUpdateState(r.getCigar(),currPos);
|
||||||
|
|
||||||
if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) avoiding_region = true;
|
if ( mState == ACTIVE_STATE && ( ! avoiding_region ) && ( mAllIndels.size() > 20 || mRecordPile.size() > 1000 ) ) {
|
||||||
|
avoiding_region = true;
|
||||||
|
}
|
||||||
|
|
||||||
if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region
|
if ( ! avoiding_region ) mRecordPile.add(r); // add new record if this is not some crazy region
|
||||||
|
else defaultReceiver.receive(r); // if we do not want to or can not deal with a region, pass reads through;
|
||||||
|
// the pile we have already collected before discovering it's a bad region will be sent through on the next call to emit()
|
||||||
|
|
||||||
mLastContig = currContig;
|
mLastContig = currContig;
|
||||||
mLastStartOnRef = currPos;
|
mLastStartOnRef = currPos;
|
||||||
|
|
@ -275,6 +284,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
// can be more than one pile in what we have stored. Also, we can still have gapless reads
|
// can be more than one pile in what we have stored. Also, we can still have gapless reads
|
||||||
// at the ends of the piles that do not really overlap with indel sites.
|
// at the ends of the piles that do not really overlap with indel sites.
|
||||||
|
|
||||||
|
|
||||||
if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels");
|
if ( mAllIndels.size() == 0 ) throw new RuntimeException("Attempt to emit pile with no indels");
|
||||||
|
|
||||||
HistogramAsNeeded(mAllIndels);
|
HistogramAsNeeded(mAllIndels);
|
||||||
|
|
@ -333,7 +343,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
// and can be emitted
|
// and can be emitted
|
||||||
|
|
||||||
if ( shouldAcceptForOutput(finalTrain ) ) {
|
if ( shouldAcceptForOutput(finalTrain ) ) {
|
||||||
System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
|
System.out.print("SITE: " + mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
|
||||||
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
|
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
|
||||||
finalTrain.size() + " indels; ");
|
finalTrain.size() + " indels; ");
|
||||||
System.out.print(finalPile.size() + " reads in the pile;") ;
|
System.out.print(finalPile.size() + " reads in the pile;") ;
|
||||||
|
|
@ -351,6 +361,11 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
||||||
// with building the indel train
|
// with building the indel train
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we may still have reads in the original pile that start after the last indel:
|
||||||
|
for ( SAMRecord r : mRecordPile ) {
|
||||||
|
defaultReceiver.receive(r);
|
||||||
|
}
|
||||||
|
|
||||||
setWaitState();
|
setWaitState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import java.io.File;
|
||||||
*/
|
*/
|
||||||
public class PassThroughWriter implements RecordReceiver {
|
public class PassThroughWriter implements RecordReceiver {
|
||||||
private SAMFileWriter writer;
|
private SAMFileWriter writer;
|
||||||
|
private int reads_written = 0;
|
||||||
|
|
||||||
public PassThroughWriter( File f, SAMFileHeader h) {
|
public PassThroughWriter( File f, SAMFileHeader h) {
|
||||||
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f);
|
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f);
|
||||||
|
|
@ -28,7 +29,13 @@ public class PassThroughWriter implements RecordReceiver {
|
||||||
public void receive(SAMRecord r) {
|
public void receive(SAMRecord r) {
|
||||||
//To change body of implemented methods use File | Settings | File Templates.
|
//To change body of implemented methods use File | Settings | File Templates.
|
||||||
writer.addAlignment(r);
|
writer.addAlignment(r);
|
||||||
|
reads_written++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() { writer.close() ; }
|
public void close() { writer.close() ; }
|
||||||
|
|
||||||
|
/** Returns the number of reads that were so far received by this writer.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public int getNumReadsReceived() { return reads_written; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,9 @@ public class PileBuilder implements RecordPileReceiver {
|
||||||
private int total_reads_in_failed = 0;
|
private int total_reads_in_failed = 0;
|
||||||
private int total_alignments_modified = 0;
|
private int total_alignments_modified = 0;
|
||||||
|
|
||||||
|
private int total_reads_received = 0;
|
||||||
|
private int total_reads_written = 0;
|
||||||
|
|
||||||
public final static int SILENT = 0;
|
public final static int SILENT = 0;
|
||||||
public final static int PILESUMMARY = 1;
|
public final static int PILESUMMARY = 1;
|
||||||
public final static int ALIGNMENTS = 2;
|
public final static int ALIGNMENTS = 2;
|
||||||
|
|
@ -115,10 +118,22 @@ public class PileBuilder implements RecordPileReceiver {
|
||||||
reference_start = -1;
|
reference_start = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns the number of reads that were so far received by this writer.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public int getNumReadsReceived() { return total_reads_received; }
|
||||||
|
|
||||||
|
/** Returns the number of reads that were so far written by this writer (NOT sent
|
||||||
|
* into its secondary "failed mode" receiver!)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public int getNumReadsWritten() { return total_reads_written; }
|
||||||
|
|
||||||
public void receive(Collection<SAMRecord> c) {
|
public void receive(Collection<SAMRecord> c) {
|
||||||
|
|
||||||
//TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup.
|
//TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup.
|
||||||
processed_piles++;
|
processed_piles++;
|
||||||
|
total_reads_received += c.size();
|
||||||
|
|
||||||
IndexedSequence[] seqs = new IndexedSequence[c.size()];
|
IndexedSequence[] seqs = new IndexedSequence[c.size()];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
@ -357,7 +372,7 @@ public class PileBuilder implements RecordPileReceiver {
|
||||||
|
|
||||||
// System.out.println("writing " + id);
|
// System.out.println("writing " + id);
|
||||||
samWriter.addAlignment(r);
|
samWriter.addAlignment(r);
|
||||||
|
total_reads_written++;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -619,12 +634,9 @@ public class PileBuilder implements RecordPileReceiver {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) {
|
public double averageDistanceForOffset(MultipleAlignment a1, MultipleAlignment a2, int offset) {
|
||||||
SelectedPair p = new SelectedPair();
|
|
||||||
|
|
||||||
double d_av = 0;
|
double d_av = 0;
|
||||||
int nseq = 0;
|
int nseq = 0;
|
||||||
int i1 = -1;
|
|
||||||
int i2 = -1;
|
|
||||||
|
|
||||||
for ( Integer id2 : a2 ) {
|
for ( Integer id2 : a2 ) {
|
||||||
SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2));
|
SelectedPair spo = averageDistanceForOffset(a1,id2,offset+a2.getOffsetById(id2));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue