From 17dbe9a95dd1f638d050a4dc9cf41227fdf562c1 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 30 Jan 2012 21:37:02 -0500 Subject: [PATCH] A few cleanups in the LocusIteratorByState * No more N's in the extended event pileups * Only add to the pileup MQ0 counter if the read actually goes into the pileup --- .../gatk/iterators/LocusIteratorByState.java | 71 ++++++++++--------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 2257cc139..34ac17f49 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -74,16 +74,16 @@ public class LocusIteratorByState extends LocusIterator { static private class SAMRecordState { SAMRecord read; - int readOffset = -1; // how far are we offset from the start of the read bases? - int genomeOffset = -1; // how far are we offset from the alignment start on the genome? + int readOffset = -1; // how far are we offset from the start of the read bases? + int genomeOffset = -1; // how far are we offset from the alignment start on the genome? Cigar cigar = null; int cigarOffset = -1; CigarElement curElement = null; int nCigarElements = 0; - // how far are we into a single cigarElement - int cigarElementCounter = -1; + + int cigarElementCounter = -1; // how far are we into a single cigarElement // The logical model for generating extended events is as follows: the "record state" implements the traversal // along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This @@ -93,19 +93,19 @@ public class LocusIteratorByState extends LocusIterator { // stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended // events immediately preceding the current reference base). - boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases? - // the only purpose of this flag is to shield away a few additional lines of code - // when extended piles are not needed, it may not be even worth it... + boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases? + // the only purpose of this flag is to shield away a few additional lines of code + // when extended piles are not needed, it may not be even worth it... - byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels) - int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events - byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the - // current base on the ref. We use a counter-like variable here since clearing the indel event is - // delayed by one base, so we need to remember how long ago we have seen the actual event + byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels) + int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events + byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the + // current base on the ref. We use a counter-like variable here since clearing the indel event is + // delayed by one base, so we need to remember how long ago we have seen the actual event - int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the - // event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly, - // we cache it here mainly for convenience + int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the + // event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly, + // we cache it here mainly for convenience public SAMRecordState(SAMRecord read, boolean extended) { @@ -241,6 +241,8 @@ public class LocusIteratorByState extends LocusIterator { readOffset += curElement.getLength(); break; case D: // deletion w.r.t. the reference + if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string + throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString()); if (generateExtendedEvents) { if (cigarElementCounter == 1) { // generate an extended event only if we just stepped into the deletion (i.e. don't @@ -403,9 +405,9 @@ public class LocusIteratorByState extends LocusIterator { final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. final int eventLength = state.getEventLength(); -// if (op != CigarOperator.N) // N's are never added to any pileup -// continue; -// + if (op == CigarOperator.N) // N's are never added to any pileup + continue; + if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref size++; ExtendedEventPileupElement pileupElement; @@ -413,27 +415,26 @@ public class LocusIteratorByState extends LocusIterator { nDeletions++; maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength()); pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength); - } + } else { // Insertion event nInsertions++; pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases()); } + if (read.getMappingQuality() == 0) + nMQ0Reads++; indelPile.add(pileupElement); } - // this read has no indel associated with the previous position on the ref. Criteria to include in the pileup are: - // we only add reads that are not N's - // we only include deletions to the pileup if the walker requests it - else if ( (op != CigarOperator.N) && (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci())) { + // this read has no indel so add it to the pileup as a NOEVENT: + // a deletion that didn't start here (therefore, not an extended event) + // we add (mis)matches as no events. + else if (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci()) { size++; indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset)); + if (read.getMappingQuality() == 0) + nMQ0Reads++; } - - - if (state.getRead().getMappingQuality() == 0) - nMQ0Reads++; - } if (indelPile.size() != 0) @@ -461,25 +462,25 @@ public class LocusIteratorByState extends LocusIterator { final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator final int readOffset = state.getReadOffset(); // the base offset on this read - final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. if (op == CigarOperator.N) // N's are never added to any pileup continue; - if (read.getMappingQuality() == 0) - nMQ0Reads++; - if (op == CigarOperator.D) { if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so - int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset; - pile.add(new PileupElement(read, leftAlignedStart, true, nextOp == CigarOperator.I, false)); + pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.I, false)); size++; nDeletions++; + if (read.getMappingQuality() == 0) + nMQ0Reads++; } - } else { + } + else { if (!filterBaseInRead(read, location.getStart())) { pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, op == CigarOperator.S)); size++; + if (read.getMappingQuality() == 0) + nMQ0Reads++; } } }