From 5d28cbda2762420632d864abcfb39a9f3b9048bf Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 25 Feb 2011 05:18:30 +0000 Subject: [PATCH] When crossing contigs it's crucial that the queue get flushed or else it will continue to accumulate reads without emitting. This is the last time I trust someone when they tell me that they are 'confident there are no bugs' in a tool. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5315 348d0f76-0448-11de-a6fe-93d51630548a --- .../ConstrainedMateFixingSAMFileWriter.java | 55 +++++++++---------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/sam/ConstrainedMateFixingSAMFileWriter.java b/java/src/org/broadinstitute/sting/utils/sam/ConstrainedMateFixingSAMFileWriter.java index b190f927a..131229fa4 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ConstrainedMateFixingSAMFileWriter.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ConstrainedMateFixingSAMFileWriter.java @@ -4,6 +4,7 @@ import net.sf.picard.sam.SamPairUtil; import net.sf.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +//import org.broadinstitute.sting.utils.SimpleTimer; import java.io.File; import java.util.*; @@ -104,9 +105,12 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter { /** read.name -> records */ HashMap forMateMatching = new HashMap(); - //Queue waitingReads = new LinkedList(); Queue waitingReads = new PriorityQueue(1000, comparer); + //private SimpleTimer timer = new SimpleTimer("ConstrainedWriter"); + //private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds + //private long lastProgressPrintTime = -1; // When was the last time we printed progress log? + /** * @@ -127,6 +131,9 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter { final int maxInsertSizeForMovingReadPairs) { this.finalDestination = finalDestination; this.maxInsertSizeForMovingReadPairs = maxInsertSizeForMovingReadPairs; + + //timer.start(); + //lastProgressPrintTime = timer.currentTime(); } public int getMaxReadsInQueue() { return maxReadsInQueue; } @@ -144,33 +151,6 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter { return pos + 2 * MAX_POS_MOVE_ALLOWED < addedRead.getAlignmentStart(); } -// private void verifyOrdering() { -// SAMRecord lastRead = null; -// List reads = new ArrayList(); -// -// reads.addAll(waitingReads); -// Collections.sort(reads, comparer); -// for ( SAMRecord read : reads ) { -// logger.info("READ is " + read.getReadName() + " pos " + read.getAlignmentStart()); -// if ( lastRead != null && comparer.fileOrderCompare(lastRead, read) > 0 ) -// throw new ReviewedStingException("BUG: records added out of order: read1=" + lastRead + -// ", pos=" + lastRead.getAlignmentStart() + " read2="+read + ", pos=" + read.getAlignmentStart()); -// lastRead = read; -// } -// -//// List reads = new ArrayList(); -//// while ( waitingReads.peek() != null ) { -//// SAMRecord read = waitingReads.poll(); -//// logger.info("READ is " + read.getReadName() + " pos " + read.getAlignmentStart()); -//// if ( lastRead != null && comparer.fileOrderCompare(lastRead, read) > 0 ) -//// throw new ReviewedStingException("BUG: records added out of order: read1=" + lastRead + -//// ", pos=" + lastRead.getAlignmentStart() + " read2="+read + ", pos=" + read.getAlignmentStart()); -//// lastRead = read; -//// reads.add(read); -//// } -// -// for ( SAMRecord read : reads ) waitingReads.add(read); -// } /** * @{inheritDoc} @@ -178,6 +158,24 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter { public void addAlignment( SAMRecord newRead ) { if ( DEBUG ) logger.info("New read pos " + newRead.getAlignmentStart()); + //final long curTime = timer.currentTime(); + //if ( curTime - lastProgressPrintTime > PROGRESS_PRINT_FREQUENCY ) { + // lastProgressPrintTime = curTime; + // System.out.println("WaitingReads.size = " + waitingReads.size() + ", forMateMatching.size = " + forMateMatching.size()); + //} + + // if the new read is on a different contig, then we need to flush the queue and clear the map + if ( waitingReads.size() > 0 && waitingReads.peek().getReferenceIndex() != newRead.getReferenceIndex()) { + if ( DEBUG ) logger.warn("Flushing queue on move to new contig: " + newRead.getReferenceName()); + + while ( ! waitingReads.isEmpty() ) { + // emit to disk + finalDestination.addAlignment(waitingReads.remove()); + } + + forMateMatching.clear(); + } + // fix mates, as needed // Since setMateInfo can move reads, we potentially need to remove the mate, and requeue // it to ensure proper sorting @@ -206,7 +204,6 @@ public class ConstrainedMateFixingSAMFileWriter implements SAMFileWriter { maxReadsInQueue = Math.max(maxReadsInQueue, waitingReads.size()); if ( ++counter % EMIT_FREQUENCY == 0 ) { - //verifyOrdering(); while ( ! waitingReads.isEmpty() ) { // there's something in the queue SAMRecord read = waitingReads.peek();