From 17dbe9a95dd1f638d050a4dc9cf41227fdf562c1 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 30 Jan 2012 21:37:02 -0500 Subject: [PATCH 2/6] A few cleanups in the LocusIteratorByState * No more N's in the extended event pileups * Only add to the pileup MQ0 counter if the read actually goes into the pileup --- .../gatk/iterators/LocusIteratorByState.java | 71 ++++++++++--------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 2257cc139..34ac17f49 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -74,16 +74,16 @@ public class LocusIteratorByState extends LocusIterator { static private class SAMRecordState { SAMRecord read; - int readOffset = -1; // how far are we offset from the start of the read bases? - int genomeOffset = -1; // how far are we offset from the alignment start on the genome? + int readOffset = -1; // how far are we offset from the start of the read bases? + int genomeOffset = -1; // how far are we offset from the alignment start on the genome? Cigar cigar = null; int cigarOffset = -1; CigarElement curElement = null; int nCigarElements = 0; - // how far are we into a single cigarElement - int cigarElementCounter = -1; + + int cigarElementCounter = -1; // how far are we into a single cigarElement // The logical model for generating extended events is as follows: the "record state" implements the traversal // along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This @@ -93,19 +93,19 @@ public class LocusIteratorByState extends LocusIterator { // stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended // events immediately preceding the current reference base). - boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases? - // the only purpose of this flag is to shield away a few additional lines of code - // when extended piles are not needed, it may not be even worth it... + boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases? + // the only purpose of this flag is to shield away a few additional lines of code + // when extended piles are not needed, it may not be even worth it... - byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels) - int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events - byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the - // current base on the ref. We use a counter-like variable here since clearing the indel event is - // delayed by one base, so we need to remember how long ago we have seen the actual event + byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels) + int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events + byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the + // current base on the ref. We use a counter-like variable here since clearing the indel event is + // delayed by one base, so we need to remember how long ago we have seen the actual event - int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the - // event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly, - // we cache it here mainly for convenience + int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the + // event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly, + // we cache it here mainly for convenience public SAMRecordState(SAMRecord read, boolean extended) { @@ -241,6 +241,8 @@ public class LocusIteratorByState extends LocusIterator { readOffset += curElement.getLength(); break; case D: // deletion w.r.t. the reference + if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string + throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString()); if (generateExtendedEvents) { if (cigarElementCounter == 1) { // generate an extended event only if we just stepped into the deletion (i.e. don't @@ -403,9 +405,9 @@ public class LocusIteratorByState extends LocusIterator { final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. final int eventLength = state.getEventLength(); -// if (op != CigarOperator.N) // N's are never added to any pileup -// continue; -// + if (op == CigarOperator.N) // N's are never added to any pileup + continue; + if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref size++; ExtendedEventPileupElement pileupElement; @@ -413,27 +415,26 @@ public class LocusIteratorByState extends LocusIterator { nDeletions++; maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength()); pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength); - } + } else { // Insertion event nInsertions++; pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases()); } + if (read.getMappingQuality() == 0) + nMQ0Reads++; indelPile.add(pileupElement); } - // this read has no indel associated with the previous position on the ref. Criteria to include in the pileup are: - // we only add reads that are not N's - // we only include deletions to the pileup if the walker requests it - else if ( (op != CigarOperator.N) && (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci())) { + // this read has no indel so add it to the pileup as a NOEVENT: + // a deletion that didn't start here (therefore, not an extended event) + // we add (mis)matches as no events. + else if (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci()) { size++; indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset)); + if (read.getMappingQuality() == 0) + nMQ0Reads++; } - - - if (state.getRead().getMappingQuality() == 0) - nMQ0Reads++; - } if (indelPile.size() != 0) @@ -461,25 +462,25 @@ public class LocusIteratorByState extends LocusIterator { final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator final int readOffset = state.getReadOffset(); // the base offset on this read - final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. if (op == CigarOperator.N) // N's are never added to any pileup continue; - if (read.getMappingQuality() == 0) - nMQ0Reads++; - if (op == CigarOperator.D) { if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so - int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset; - pile.add(new PileupElement(read, leftAlignedStart, true, nextOp == CigarOperator.I, false)); + pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.I, false)); size++; nDeletions++; + if (read.getMappingQuality() == 0) + nMQ0Reads++; } - } else { + } + else { if (!filterBaseInRead(read, location.getStart())) { pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, op == CigarOperator.S)); size++; + if (read.getMappingQuality() == 0) + nMQ0Reads++; } } } From a630db1703bd30b2258149fc9a00c7c4f4a88531 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Tue, 31 Jan 2012 11:58:21 -0500 Subject: [PATCH 6/6] Oops...HierarchicalMicroScheduler was transforming any exception from the walker level into a ReviewedStingException. Thanks to Ryan for pointing this out. --- .../gatk/executive/HierarchicalMicroScheduler.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index eec440820..433c7d82f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; import java.util.Collection; @@ -101,7 +102,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar while (isShardTraversePending() || isTreeReducePending()) { // Check for errors during execution. if(hasTraversalErrorOccurred()) - throw new ReviewedStingException("An error has occurred during the traversal.",getTraversalError()); + throw getTraversalError(); // Too many files sitting around taking up space? Merge them. if (isMergeLimitExceeded()) @@ -344,10 +345,15 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar return error != null; } - private synchronized Throwable getTraversalError() { + private synchronized StingException getTraversalError() { if(!hasTraversalErrorOccurred()) throw new ReviewedStingException("User has attempted to retrieve a traversal error when none exists"); - return error; + + // If the error is already a StingException, pass it along as is. Otherwise, wrap it. + if(error instanceof StingException) + return (StingException)error; + else + return new ReviewedStingException("An error occurred during the traversal.",error); } /**