fix to guarantee emission in sorted order
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1122 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
03f8177a53
commit
8aa3b65e7f
|
|
@ -51,6 +51,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
|
|
||||||
// we need to sort the reads ourselves because SAM headers get messed up and claim to be "unsorted" sometimes
|
// we need to sort the reads ourselves because SAM headers get messed up and claim to be "unsorted" sometimes
|
||||||
private TreeSet<ComparableSAMRecord> readsToWrite = null;
|
private TreeSet<ComparableSAMRecord> readsToWrite = null;
|
||||||
|
private TreeSet<ComparableSAMRecord> nextSetOfReadsToWrite = null;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
|
|
@ -119,9 +120,23 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
//testCleanWithInsertion();
|
//testCleanWithInsertion();
|
||||||
|
|
||||||
if ( writer != null ) {
|
if ( writer != null ) {
|
||||||
Iterator<ComparableSAMRecord> iter = readsToWrite.iterator();
|
// Although we can guarantee that reads will be emitted in order WITHIN an interval
|
||||||
while ( iter.hasNext() )
|
// (since we sort them ourselves), we can't guarantee it BETWEEN intervals. So,
|
||||||
writer.addAlignment(iter.next().getRecord());
|
// we need to keep track of the PREVIOUS interval's reads: if they don't overlap
|
||||||
|
// with those from this interval then we can emit them; otherwise, we merge them.
|
||||||
|
if ( nextSetOfReadsToWrite != null ) {
|
||||||
|
if ( readsToWrite.size() > 0 && nextSetOfReadsToWrite.size() > 0 &&
|
||||||
|
readsToWrite.first().getRecord().getAlignmentStart() < nextSetOfReadsToWrite.last().getRecord().getAlignmentStart() ) {
|
||||||
|
nextSetOfReadsToWrite.addAll(readsToWrite);
|
||||||
|
} else {
|
||||||
|
Iterator<ComparableSAMRecord> iter = nextSetOfReadsToWrite.iterator();
|
||||||
|
while ( iter.hasNext() )
|
||||||
|
writer.addAlignment(iter.next().getRecord());
|
||||||
|
nextSetOfReadsToWrite = new TreeSet<ComparableSAMRecord>(readsToWrite);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nextSetOfReadsToWrite = new TreeSet<ComparableSAMRecord>(readsToWrite);
|
||||||
|
}
|
||||||
readsToWrite.clear();
|
readsToWrite.clear();
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -136,7 +151,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
out.println("Saw " + result + " intervals");
|
if ( nextSetOfReadsToWrite != null ) {
|
||||||
|
Iterator<ComparableSAMRecord> iter = nextSetOfReadsToWrite.iterator();
|
||||||
|
while ( iter.hasNext() )
|
||||||
|
writer.addAlignment(iter.next().getRecord());
|
||||||
|
}
|
||||||
if ( writer != null ) {
|
if ( writer != null ) {
|
||||||
writer.close();
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
@ -161,6 +180,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
logger.error("Failed to close "+OUT_SNPS+" gracefully. Data may be corrupt.");
|
logger.error("Failed to close "+OUT_SNPS+" gracefully. Data may be corrupt.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
out.println("Saw " + result + " intervals");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue