From 8aa3b65e7f676eb23f8746599055ed807ab593ea Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 30 Jun 2009 13:48:41 +0000 Subject: [PATCH] fix to guarantee emission in sorted order git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1122 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/indels/IntervalCleanerWalker.java | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java index a602d00dc..562b01459 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java @@ -51,6 +51,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker // we need to sort the reads ourselves because SAM headers get messed up and claim to be "unsorted" sometimes private TreeSet readsToWrite = null; + private TreeSet nextSetOfReadsToWrite = null; public void initialize() { @@ -119,9 +120,23 @@ public class IntervalCleanerWalker extends LocusWindowWalker //testCleanWithInsertion(); if ( writer != null ) { - Iterator iter = readsToWrite.iterator(); - while ( iter.hasNext() ) - writer.addAlignment(iter.next().getRecord()); + // Although we can guarantee that reads will be emitted in order WITHIN an interval + // (since we sort them ourselves), we can't guarantee it BETWEEN intervals. So, + // we need to keep track of the PREVIOUS interval's reads: if they don't overlap + // with those from this interval then we can emit them; otherwise, we merge them. + if ( nextSetOfReadsToWrite != null ) { + if ( readsToWrite.size() > 0 && nextSetOfReadsToWrite.size() > 0 && + readsToWrite.first().getRecord().getAlignmentStart() < nextSetOfReadsToWrite.last().getRecord().getAlignmentStart() ) { + nextSetOfReadsToWrite.addAll(readsToWrite); + } else { + Iterator iter = nextSetOfReadsToWrite.iterator(); + while ( iter.hasNext() ) + writer.addAlignment(iter.next().getRecord()); + nextSetOfReadsToWrite = new TreeSet(readsToWrite); + } + } else { + nextSetOfReadsToWrite = new TreeSet(readsToWrite); + } readsToWrite.clear(); } return 1; @@ -136,7 +151,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker } public void onTraversalDone(Integer result) { - out.println("Saw " + result + " intervals"); + if ( nextSetOfReadsToWrite != null ) { + Iterator iter = nextSetOfReadsToWrite.iterator(); + while ( iter.hasNext() ) + writer.addAlignment(iter.next().getRecord()); + } if ( writer != null ) { writer.close(); } @@ -161,6 +180,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker logger.error("Failed to close "+OUT_SNPS+" gracefully. Data may be corrupt."); } } + out.println("Saw " + result + " intervals"); }