diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index a6731ee18..d1a2e7519 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -23,7 +23,7 @@ import java.util.NoSuchElementException; */ /** - * A LocusView over which the user can iterate. + * A LocusView over which the user can iterate. */ public class AllLocusView extends LocusView { @@ -47,12 +47,13 @@ public class AllLocusView extends LocusView { /** * Create a new queue of locus contexts. + * * @param provider */ - public AllLocusView(LocusShardDataProvider provider) { - super( provider ); + public AllLocusView(LocusShardDataProvider provider) { + super(provider); // Seed the state tracking members with the first possible seek position and the first possible locus context. - locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus()); + locusIterator = new GenomeLocusIterator(genomeLocParser, provider.getLocus()); } public boolean hasNext() { @@ -63,7 +64,7 @@ public class AllLocusView extends LocusView { public AlignmentContext next() { advance(); - if(nextPosition == null) + if (nextPosition == null) throw new NoSuchElementException("No next is available in the all locus view"); // Flag to the iterator that no data is waiting in the queue to be processed. @@ -72,7 +73,7 @@ public class AllLocusView extends LocusView { AlignmentContext currentLocus; // If actual data is present, return it. Otherwise, return empty data. - if( nextLocus != null && nextLocus.getLocation().equals(nextPosition) ) + if (nextLocus != null && nextLocus.getLocation().equals(nextPosition)) currentLocus = nextLocus; else currentLocus = createEmptyLocus(nextPosition); @@ -82,15 +83,15 @@ public class AllLocusView extends LocusView { private void advance() { // Already at the next element? Don't move forward. - if(atNextElement) + if (atNextElement) return; // Out of elements? - if(nextPosition == null && !locusIterator.hasNext()) - return; + if (nextPosition == null && !locusIterator.hasNext()) + return; // If nextLocus has been consumed, clear it out to make room for the next incoming locus. - if(nextPosition != null && nextLocus != null && !nextLocus.getLocation().isPast(nextPosition)) { + if (nextPosition != null && nextLocus != null && !nextLocus.getLocation().isPast(nextPosition)) { nextLocus = null; // Determine the next locus. The trick is that we may have more than one alignment context at the same @@ -98,9 +99,9 @@ public class AllLocusView extends LocusView { // is still at the current position, we do not increment current position and wait for next call to next() to return // that context. If we know that next context is past the current position, we are done with current // position - if(hasNextLocus()) { + if (hasNextLocus()) { nextLocus = nextLocus(); - if(nextPosition.equals(nextLocus.getLocation())) { + if (nextPosition.equals(nextLocus.getLocation())) { atNextElement = true; return; } @@ -108,7 +109,7 @@ public class AllLocusView extends LocusView { } // No elements left in queue? Clear out the position state tracker and return. - if(!locusIterator.hasNext()) { + if (!locusIterator.hasNext()) { nextPosition = null; return; } @@ -119,9 +120,9 @@ public class AllLocusView extends LocusView { // Crank the iterator to (if possible) or past the next context. Be careful not to hold a reference to nextLocus // while using the hasNextLocus() / nextLocus() machinery; this will cause us to use more memory than is optimal. - while(nextLocus == null || nextLocus.getLocation().isBefore(nextPosition)) { + while (nextLocus == null || nextLocus.getLocation().isBefore(nextPosition)) { nextLocus = null; - if(!hasNextLocus()) + if (!hasNextLocus()) break; nextLocus = nextLocus(); } @@ -129,12 +130,15 @@ public class AllLocusView extends LocusView { /** * Creates a blank locus context at the specified location. + * * @param site Site at which to create the blank locus context. * @return empty context. */ private final static List EMPTY_PILEUP_READS = Collections.emptyList(); private final static List EMPTY_PILEUP_OFFSETS = Collections.emptyList(); - private AlignmentContext createEmptyLocus( GenomeLoc site ) { - return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS)); + private final static List EMPTY_DELETION_STATUS = Collections.emptyList(); + + private AlignmentContext createEmptyLocus(GenomeLoc site) { + return new AlignmentContext(site, new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 75e787e05..f1ffa121b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -49,9 +49,13 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; -/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */ +/** + * Iterator that traverses a SAM File, accumulating information on a per-locus basis + */ public class LocusIteratorByState extends LocusIterator { - /** our log, which we want to capture anything from this class */ + /** + * our log, which we want to capture anything from this class + */ private static Logger logger = Logger.getLogger(LocusIteratorByState.class); // ----------------------------------------------------------------------------------------------------------------- @@ -92,12 +96,14 @@ public class LocusIteratorByState extends LocusIterator { boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases? // the only purpose of this flag is to shield away a few additional lines of code // when extended piles are not needed, it may not be even worth it... - byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels) - int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events - byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the + + byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels) + int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events + byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the // current base on the ref. We use a counter-like variable here since clearing the indel event is // delayed by one base, so we need to remember how long ago we have seen the actual event - int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the + + int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the // event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly, // we cache it here mainly for convenience @@ -111,23 +117,31 @@ public class LocusIteratorByState extends LocusIterator { //System.out.printf("Creating a SAMRecordState: %s%n", this); } - public SAMRecord getRead() { return read; } + public SAMRecord getRead() { + return read; + } /** * What is our current offset in the read's bases that aligns us with the reference genome? * * @return */ - public int getReadOffset() { return readOffset; } + public int getReadOffset() { + return readOffset; + } /** * What is the current offset w.r.t. the alignment state that aligns us to the readOffset? * * @return */ - public int getGenomeOffset() { return genomeOffset; } + public int getGenomeOffset() { + return genomeOffset; + } - public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); } + public int getGenomePosition() { + return read.getAlignmentStart() + getGenomeOffset(); + } public GenomeLoc getLocation(GenomeLocParser genomeLocParser) { return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); @@ -137,19 +151,26 @@ public class LocusIteratorByState extends LocusIterator { return curElement.getOperator(); } - /** Returns true if we just stepped over insertion/into a deletion prior to the last return from stepForwardOnGenome. + /** + * Returns true if we just stepped over insertion/into a deletion prior to the last return from stepForwardOnGenome. * * @return */ public boolean hadIndel() { - return ( eventLength > 0 ); + return (eventLength > 0); } - public int getEventLength() { return eventLength; } + public int getEventLength() { + return eventLength; + } - public byte[] getEventBases() { return insertedBases; } + public byte[] getEventBases() { + return insertedBases; + } - public int getReadEventStartOffset() { return eventStart; } + public int getReadEventStartOffset() { + return eventStart; + } public String toString() { return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement); @@ -160,9 +181,9 @@ public class LocusIteratorByState extends LocusIterator { // (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion - if ( curElement == null || ++cigarElementCounter > curElement.getLength() ) { + if (curElement == null || ++cigarElementCounter > curElement.getLength()) { cigarOffset++; - if ( cigarOffset < nCigarElements ) { + if (cigarOffset < nCigarElements) { curElement = cigar.getCigarElement(cigarOffset); cigarElementCounter = 0; // next line: guards against cigar elements of length 0; when new cigar element is retrieved, @@ -174,15 +195,15 @@ public class LocusIteratorByState extends LocusIterator { // current offset of this read is the next ref base after the end of the indel. This position will // model a point on the reference somewhere after the end of the read. genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here: - // we do step forward on the ref, and by returning null we also indicate that we are past the read end. + // we do step forward on the ref, and by returning null we also indicate that we are past the read end. - if ( generateExtendedEvents && eventDelayedFlag > 0 ) { + if (generateExtendedEvents && eventDelayedFlag > 0) { // if we had an indel right before the read ended (i.e. insertion was the last cigar element), // we keep it until next reference base; then we discard it and this will allow the LocusIterator to // finally discard this read eventDelayedFlag--; - if ( eventDelayedFlag == 0 ) { + if (eventDelayedFlag == 0) { eventLength = -1; // reset event when we are past it insertedBases = null; eventStart = -1; @@ -193,34 +214,35 @@ public class LocusIteratorByState extends LocusIterator { } } - boolean done = false; switch (curElement.getOperator()) { - case H : // ignore hard clips - case P : // ignore pads + case H: // ignore hard clips + case P: // ignore pads cigarElementCounter = curElement.getLength(); break; - case I : // insertion w.r.t. the reference - if ( generateExtendedEvents ) { + case I: // insertion w.r.t. the reference + if (generateExtendedEvents) { // we see insertions only once, when we step right onto them; the position on the read is scrolled // past the insertion right after that - if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBAM(read, "Adjacent I/D events in read "+read.getReadName()); - insertedBases = Arrays.copyOfRange(read.getReadBases(),readOffset+1,readOffset+1+curElement.getLength()); - eventLength = curElement.getLength() ; + if (eventDelayedFlag > 1) + throw new UserException.MalformedBAM(read, "Adjacent I/D events in read " + read.getReadName()); + insertedBases = Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + curElement.getLength()); + eventLength = curElement.getLength(); eventStart = readOffset; eventDelayedFlag = 2; // insertion causes re-entry into stepForwardOnGenome, so we set the delay to 2 // System.out.println("Inserted "+(new String (insertedBases)) +" after "+readOffset); } // continue onto the 'S' case ! - case S : // soft clip + case S: // soft clip cigarElementCounter = curElement.getLength(); readOffset += curElement.getLength(); break; - case D : // deletion w.r.t. the reference - if ( generateExtendedEvents ) { - if ( cigarElementCounter == 1) { + case D: // deletion w.r.t. the reference + if (generateExtendedEvents) { + if (cigarElementCounter == 1) { // generate an extended event only if we just stepped into the deletion (i.e. don't // generate the event at every deleted position on the ref, that's what cigarElementCounter==1 is for!) - if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBAM(read, "Adjacent I/D events in read "+read.getReadName()); + if (eventDelayedFlag > 1) + throw new UserException.MalformedBAM(read, "Adjacent I/D events in read " + read.getReadName()); eventLength = curElement.getLength(); eventDelayedFlag = 2; // deletion on the ref causes an immediate return, so we have to delay by 1 only eventStart = readOffset; @@ -232,26 +254,27 @@ public class LocusIteratorByState extends LocusIterator { genomeOffset++; done = true; break; - case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning) + case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning) genomeOffset++; done = true; break; - case M : + case M: readOffset++; genomeOffset++; done = true; break; - default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); + default: + throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator()); } - if ( generateExtendedEvents ) { - if ( eventDelayedFlag > 0 && done ) { - // if we did make a successful step on the ref, decrement delayed flag. If, upon the decrementthe, + if (generateExtendedEvents) { + if (eventDelayedFlag > 0 && done) { + // if we did make a successful step on the ref, decrement delayed flag. If, upon the decrementing the, // the flag is 1, we are standing on the reference base right after the indel (so we have to keep it). // Otherwise, we are away from the previous indel and have to clear our memories... eventDelayedFlag--; // when we notice an indel, we set delayed flag to 2, so now - // if eventDelayedFlag == 1, an indel occured right before the current base - if ( eventDelayedFlag == 0 ) { + // if eventDelayedFlag == 1, an indel occured right before the current base + if (eventDelayedFlag == 0) { eventLength = -1; // reset event when we are past it insertedBases = null; eventStart = -1; @@ -274,15 +297,15 @@ public class LocusIteratorByState extends LocusIterator { // // ----------------------------------------------------------------------------------------------------------------- - public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection samples ) { + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection samples) { this.readInfo = readInformation; this.genomeLocParser = genomeLocParser; this.samples = new ArrayList(samples); - this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod()); + this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod()); // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when // there's no read data. So we need to throw this error only when samIterator.hasNext() is true - if ( this.samples.isEmpty() && samIterator.hasNext() ) { + if (this.samples.isEmpty() && samIterator.hasNext()) { throw new IllegalArgumentException("samples list must not be empty"); } } @@ -322,7 +345,7 @@ public class LocusIteratorByState extends LocusIterator { // ----------------------------------------------------------------------------------------------------------------- public AlignmentContext next() { lazyLoadNextAlignmentContext(); - if(!hasNext()) + if (!hasNext()) throw new NoSuchElementException("LocusIteratorByState: out of elements."); AlignmentContext currentAlignmentContext = nextAlignmentContext; nextAlignmentContext = null; @@ -334,7 +357,7 @@ public class LocusIteratorByState extends LocusIterator { * nextAlignmentContext MUST BE null in order for this method to advance to the next entry. */ private void lazyLoadNextAlignmentContext() { - while(nextAlignmentContext == null && readStates.hasNext()) { + while (nextAlignmentContext == null && readStates.hasNext()) { // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref: readStates.collectPendingReads(); @@ -350,14 +373,14 @@ public class LocusIteratorByState extends LocusIterator { // In this case, the subsequent call to next() will emit the normal pileup at the current base // and shift the position. if (readInfo.generateExtendedEvents() && hasExtendedEvents) { - Map fullExtendedEventPileup = new HashMap(); + Map fullExtendedEventPileup = new HashMap(); // get current location on the reference and decrement it by 1: the indels we just stepped over // are associated with the *previous* reference base - GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1); + GenomeLoc loc = genomeLocParser.incPos(getLocation(), -1); boolean hasBeenSampled = false; - for(final String sample: samples) { + for (final String sample : samples) { Iterator iterator = readStates.iterator(sample); List indelPile = new ArrayList(readStates.size(sample)); hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample); @@ -368,103 +391,108 @@ public class LocusIteratorByState extends LocusIterator { nMQ0Reads = 0; int maxDeletionLength = 0; - while(iterator.hasNext()) { - SAMRecordState state = iterator.next(); - if ( state.hadIndel() ) { + while (iterator.hasNext()) { + final SAMRecordState state = iterator.next(); + final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read + final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator + final int readOffset = state.getReadOffset(); // the base offset on this read + final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. + final int eventLength = state.getEventLength(); + +// if (op != CigarOperator.N) // N's are never added to any pileup +// continue; +// + if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref size++; - if ( state.getEventBases() == null ) { + ExtendedEventPileupElement pileupElement; + if (state.getEventBases() == null) { // Deletion event nDeletions++; - maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength()); + maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength()); + pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength); + } + else { // Insertion event + nInsertions++; + pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases()); } - else nInsertions++; - indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) ); - } else { - // HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position - // and add in extra reads that start after this indel. Skip these reads. - // My belief at this moment after empirically looking at read->ref alignment is that, in a cigar string - // like 1I76M, the first insertion is between alignment start-1 and alignment start, so we shouldn't be - // filtering these out. - // TODO: UPDATE! Eric tells me that we *might* want reads adjacent to the pileup in the pileup. Strike this block. - //if(state.getRead().getAlignmentStart() > loc.getStart()) - // continue; - - if ( state.getCurrentCigarOperator() != CigarOperator.N ) { - // this read has no indel associated with the previous position on the ref; - // we count this read in only if it has actual bases, not N span... - if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) { - - // if cigar operator is D but the read has no extended event reported (that's why we ended - // up in this branch), it means that we are currently inside a deletion that started earlier; - // we count such reads (with a longer deletion spanning over a deletion at the previous base we are - // about to report) only if includeReadsWithDeletionAtLoci is true. - size++; - indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent - ); - } - } + indelPile.add(pileupElement); } - if ( state.getRead().getMappingQuality() == 0 ) { + + // this read has no indel associated with the previous position on the ref. Criteria to include in the pileup are: + // we only add reads that are not N's + // we only include deletions to the pileup if the walker requests it + else if ( (op != CigarOperator.N) && (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci())) { + size++; + indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset)); + } + + + if (state.getRead().getMappingQuality() == 0) nMQ0Reads++; - } - } - if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sample,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads)); - } - hasExtendedEvents = false; // we are done with extended events prior to current ref base -// System.out.println("Indel(s) at "+loc); -// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); } - nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled); - } else { - GenomeLoc location = getLocation(); - Map fullPileup = new HashMap(); + } + + if (indelPile.size() != 0) + fullExtendedEventPileup.put(sample, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads)); + } + hasExtendedEvents = false; // we are done with extended events prior to current ref base + nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled); + } + else { // this is a regular event pileup (not extended) + GenomeLoc location = getLocation(); + Map fullPileup = new HashMap(); boolean hasBeenSampled = false; - for(final String sample: samples) { + for (final String sample : samples) { Iterator iterator = readStates.iterator(sample); List pile = new ArrayList(readStates.size(sample)); hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample); - size = 0; - nDeletions = 0; - nMQ0Reads = 0; + size = 0; // number of elements in this sample's pileup + nDeletions = 0; // number of deletions in this sample's pileup + nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0) - while(iterator.hasNext()) { - SAMRecordState state = iterator.next(); - if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { - if ( filterBaseInRead((GATKSAMRecord) state.getRead(), location.getStart()) ) { - //discarded_bases++; - //printStatus("Adaptor bases", discarded_adaptor_bases); - continue; - } else { - //observed_bases++; - pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset())); + while (iterator.hasNext()) { + final SAMRecordState state = iterator.next(); // state object with the read/offset information + final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read + final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator + final int readOffset = state.getReadOffset(); // the base offset on this read + final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. + + if (op == CigarOperator.N) // N's are never added to any pileup + continue; + + if (read.getMappingQuality() == 0) + nMQ0Reads++; + + if (op == CigarOperator.D) { + if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so + int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset; + pile.add(new PileupElement(read, leftAlignedStart, true)); + size++; + nDeletions++; + } + } else { + if (!filterBaseInRead(read, location.getStart())) { + pile.add(new PileupElement(read, readOffset, false)); size++; } - } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { - size++; - pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1)); - nDeletions++; - } - - if ( state.getRead().getMappingQuality() == 0 ) { - nMQ0Reads++; } } - if( pile.size() != 0 ) - fullPileup.put(sample,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads)); + if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup + fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads)); } - updateReadStates(); // critical - must be called after we get the current state offsets and location - // if we got reads with non-D/N over the current position, we are done - if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location,fullPileup),hasBeenSampled); + updateReadStates(); // critical - must be called after we get the current state offsets and location + if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done + nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled); } } } // fast testing of position private boolean readIsPastCurrentPosition(SAMRecord read) { - if ( readStates.isEmpty() ) + if (readStates.isEmpty()) return false; else { SAMRecordState state = readStates.getFirst(); @@ -485,20 +513,18 @@ public class LocusIteratorByState extends LocusIterator { } private void updateReadStates() { - for(final String sample: samples) { + for (final String sample : samples) { Iterator it = readStates.iterator(sample); - while ( it.hasNext() ) { + while (it.hasNext()) { SAMRecordState state = it.next(); CigarOperator op = state.stepForwardOnGenome(); - if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true; - else { + if (state.hadIndel() && readInfo.generateExtendedEvents()) + hasExtendedEvents = true; + else if (op == null) { // we discard the read only when we are past its end AND indel at the end of the read (if any) was // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. - if ( op == null ) { // we've stepped off the end of the object - //if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart())); - it.remove(); - } + it.remove(); // we've stepped off the end of the object } } } @@ -508,20 +534,20 @@ public class LocusIteratorByState extends LocusIterator { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); } - private class ReadStateManager { + private class ReadStateManager { private final PeekableIterator iterator; private final DownsamplingMethod downsamplingMethod; private final SamplePartitioner samplePartitioner; - private final Map readStatesBySample = new HashMap(); + private final Map readStatesBySample = new HashMap(); private final int targetCoverage; private int totalReadStates = 0; public ReadStateManager(Iterator source, DownsamplingMethod downsamplingMethod) { this.iterator = new PeekableIterator(source); this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE; - switch(this.downsamplingMethod.type) { + switch (this.downsamplingMethod.type) { case BY_SAMPLE: - if(downsamplingMethod.toCoverage == null) + if (downsamplingMethod.toCoverage == null) throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample"); this.targetCoverage = downsamplingMethod.toCoverage; break; @@ -529,10 +555,10 @@ public class LocusIteratorByState extends LocusIterator { this.targetCoverage = Integer.MAX_VALUE; } - Map readSelectors = new HashMap(); - for(final String sample: samples) { - readStatesBySample.put(sample,new PerSampleReadStateManager()); - readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector()); + Map readSelectors = new HashMap(); + for (final String sample : samples) { + readStatesBySample.put(sample, new PerSampleReadStateManager()); + readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector()); } samplePartitioner = new SamplePartitioner(readSelectors); @@ -541,6 +567,7 @@ public class LocusIteratorByState extends LocusIterator { /** * Returns a iterator over all the reads associated with the given sample. Note that remove() is implemented * for this iterator; if present, total read states will be decremented. + * * @param sample The sample. * @return Iterator over the reads associated with that sample. */ @@ -569,6 +596,7 @@ public class LocusIteratorByState extends LocusIterator { /** * Retrieves the total number of reads in the manager across all samples. + * * @return Total number of reads over all samples. */ public int size() { @@ -577,6 +605,7 @@ public class LocusIteratorByState extends LocusIterator { /** * Retrieves the total number of reads in the manager in the given sample. + * * @param sample The sample. * @return Total number of reads in the given sample. */ @@ -587,6 +616,7 @@ public class LocusIteratorByState extends LocusIterator { /** * The extent of downsampling; basically, the furthest base out which has 'fallen * victim' to the downsampler. + * * @param sample Sample, downsampled independently. * @return Integer stop of the furthest undownsampled region. */ @@ -595,9 +625,9 @@ public class LocusIteratorByState extends LocusIterator { } public SAMRecordState getFirst() { - for(final String sample: samples) { + for (final String sample : samples) { PerSampleReadStateManager reads = readStatesBySample.get(sample); - if(!reads.isEmpty()) + if (!reads.isEmpty()) return reads.peek(); } return null; @@ -608,19 +638,18 @@ public class LocusIteratorByState extends LocusIterator { } public void collectPendingReads() { - if(!iterator.hasNext()) + if (!iterator.hasNext()) return; - if(readStates.size() == 0) { + if (readStates.size() == 0) { int firstContigIndex = iterator.peek().getReferenceIndex(); int firstAlignmentStart = iterator.peek().getAlignmentStart(); - while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) { + while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) { samplePartitioner.submitRead(iterator.next()); } - } - else { + } else { // Fast fail in the case that the read is past the current position. - if(readIsPastCurrentPosition(iterator.peek())) + if (readIsPastCurrentPosition(iterator.peek())) return; while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) { @@ -629,7 +658,7 @@ public class LocusIteratorByState extends LocusIterator { } samplePartitioner.complete(); - for(final String sample: samples) { + for (final String sample : samples) { ReadSelector aggregator = samplePartitioner.getSelectedReads(sample); Collection newReads = new ArrayList(aggregator.getSelectedReads()); @@ -638,21 +667,20 @@ public class LocusIteratorByState extends LocusIterator { int numReads = statesBySample.size(); int downsamplingExtent = aggregator.getDownsamplingExtent(); - if(numReads+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE) { + if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) { long readLimit = aggregator.getNumReadsSeen(); - addReadsToSample(statesBySample,newReads,readLimit); + addReadsToSample(statesBySample, newReads, readLimit); statesBySample.specifyNewDownsamplingExtent(downsamplingExtent); - } - else { + } else { int[] counts = statesBySample.getCountsPerAlignmentStart(); int[] updatedCounts = new int[counts.length]; - System.arraycopy(counts,0,updatedCounts,0,counts.length); + System.arraycopy(counts, 0, updatedCounts, 0, counts.length); boolean readPruned = true; - while(numReads+newReads.size()>targetCoverage && readPruned) { + while (numReads + newReads.size() > targetCoverage && readPruned) { readPruned = false; - for(int alignmentStart=updatedCounts.length-1;numReads+newReads.size()>targetCoverage&&alignmentStart>=0;alignmentStart--) { - if(updatedCounts[alignmentStart] > 1) { + for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) { + if (updatedCounts[alignmentStart] > 1) { updatedCounts[alignmentStart]--; numReads--; readPruned = true; @@ -660,7 +688,7 @@ public class LocusIteratorByState extends LocusIterator { } } - if(numReads == targetCoverage) { + if (numReads == targetCoverage) { updatedCounts[0]--; numReads--; } @@ -668,18 +696,18 @@ public class LocusIteratorByState extends LocusIterator { BitSet toPurge = new BitSet(readStates.size()); int readOffset = 0; - for(int i = 0; i < updatedCounts.length; i++) { + for (int i = 0; i < updatedCounts.length; i++) { int n = counts[i]; int k = updatedCounts[i]; - for(Integer purgedElement: MathUtils.sampleIndicesWithoutReplacement(n,n-k)) - toPurge.set(readOffset+purgedElement); + for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k)) + toPurge.set(readOffset + purgedElement); readOffset += counts[i]; } - downsamplingExtent = Math.max(downsamplingExtent,statesBySample.purge(toPurge)); - - addReadsToSample(statesBySample,newReads,targetCoverage-numReads); + downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge)); + + addReadsToSample(statesBySample, newReads, targetCoverage - numReads); statesBySample.specifyNewDownsamplingExtent(downsamplingExtent); } } @@ -688,23 +716,25 @@ public class LocusIteratorByState extends LocusIterator { /** * Add reads with the given sample name to the given hanger entry. + * * @param readStates The list of read states to add this collection of reads. - * @param reads Reads to add. Selected reads will be pulled from this source. - * @param maxReads Maximum number of reads to add. + * @param reads Reads to add. Selected reads will be pulled from this source. + * @param maxReads Maximum number of reads to add. */ private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection reads, final long maxReads) { - if(reads.isEmpty()) + if (reads.isEmpty()) return; Collection newReadStates = new LinkedList(); int readCount = 0; - for(SAMRecord read: reads) { - if(readCount < maxReads) { + for (SAMRecord read : reads) { + if (readCount < maxReads) { SAMRecordState state = new SAMRecordState(read, readInfo.generateExtendedEvents()); state.stepForwardOnGenome(); newReadStates.add(state); // TODO: What if we downsample the extended events away? - if (state.hadIndel()) hasExtendedEvents = true; + if (state.hadIndel()) + hasExtendedEvents = true; readCount++; } } @@ -735,7 +765,7 @@ public class LocusIteratorByState extends LocusIterator { } public void specifyNewDownsamplingExtent(int downsamplingExtent) { - this.downsamplingExtent = Math.max(this.downsamplingExtent,downsamplingExtent); + this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent); } public int getDownsamplingExtent() { @@ -745,7 +775,7 @@ public class LocusIteratorByState extends LocusIterator { public int[] getCountsPerAlignmentStart() { int[] counts = new int[readStateCounter.size()]; int index = 0; - for(Counter counter: readStateCounter) + for (Counter counter : readStateCounter) counts[index++] = counter.getCount(); return counts; } @@ -766,7 +796,7 @@ public class LocusIteratorByState extends LocusIterator { wrappedIterator.remove(); Counter counter = readStateCounter.peek(); counter.decrement(); - if(counter.getCount() == 0) + if (counter.getCount() == 0) readStateCounter.remove(); } }; @@ -775,13 +805,14 @@ public class LocusIteratorByState extends LocusIterator { /** * Purge the given elements from the bitset. If an element in the bitset is true, purge * the corresponding read state. + * * @param elements bits from the set to purge. * @return the extent of the final downsampled read. */ public int purge(final BitSet elements) { int downsamplingExtent = 0; - if(elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent; + if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent; Iterator readStateIterator = readStates.iterator(); @@ -794,22 +825,22 @@ public class LocusIteratorByState extends LocusIterator { int toPurge = elements.nextSetBit(0); int removedCount = 0; - while(readStateIterator.hasNext() && toPurge >= 0) { + while (readStateIterator.hasNext() && toPurge >= 0) { SAMRecordState state = readStateIterator.next(); - downsamplingExtent = Math.max(downsamplingExtent,state.getRead().getAlignmentEnd()); + downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd()); - if(readIndex == toPurge) { + if (readIndex == toPurge) { readStateIterator.remove(); currentCounter.decrement(); - if(currentCounter.getCount() == 0) + if (currentCounter.getCount() == 0) counterIterator.remove(); removedCount++; - toPurge = elements.nextSetBit(toPurge+1); + toPurge = elements.nextSetBit(toPurge + 1); } readIndex++; alignmentStartCounter--; - if(alignmentStartCounter == 0 && counterIterator.hasNext()) { + if (alignmentStartCounter == 0 && counterIterator.hasNext()) { currentCounter = counterIterator.next(); alignmentStartCounter = currentCounter.getCount(); } @@ -849,12 +880,14 @@ public class LocusIteratorByState extends LocusIterator { interface ReadSelector { /** * All previous selectors in the chain have allowed this read. Submit it to this selector for consideration. + * * @param read the read to evaluate. */ public void submitRead(SAMRecord read); /** * A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid. + * * @param read the read previously rejected. */ public void notifyReadRejected(SAMRecord read); @@ -866,12 +899,14 @@ interface ReadSelector { /** * Retrieve the number of reads seen by this selector so far. + * * @return number of reads seen. */ public long getNumReadsSeen(); /** * Return the number of reads accepted by this selector so far. + * * @return number of reads selected. */ public long getNumReadsSelected(); @@ -880,12 +915,14 @@ interface ReadSelector { * Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the * last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at * position 3 whose cigar string is 76M, the value of this parameter will be 78. + * * @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0. */ public int getDownsamplingExtent(); /** * Get the reads selected by this selector. + * * @return collection of reads selected by this selector. */ public Collection getSelectedReads(); @@ -911,7 +948,7 @@ class AllReadsSelector implements ReadSelector { public void notifyReadRejected(SAMRecord read) { readsSeen++; - downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); + downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd()); } public void complete() { @@ -949,18 +986,18 @@ class NRandomReadSelector implements ReadSelector { private final ReservoirDownsampler reservoir; private final ReadSelector chainedSelector; private long readsSeen = 0; - private int downsamplingExtent = 0; + private int downsamplingExtent = 0; public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) { - this.reservoir = new ReservoirDownsampler((int)readLimit); + this.reservoir = new ReservoirDownsampler((int) readLimit); this.chainedSelector = chainedSelector; } public void submitRead(SAMRecord read) { SAMRecord displaced = reservoir.add(read); - if(displaced != null && chainedSelector != null) { + if (displaced != null && chainedSelector != null) { chainedSelector.notifyReadRejected(read); - downsamplingExtent = Math.max(downsamplingExtent,read.getAlignmentEnd()); + downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd()); } readsSeen++; } @@ -970,9 +1007,9 @@ class NRandomReadSelector implements ReadSelector { } public void complete() { - for(SAMRecord read: reservoir.getDownsampledContents()) + for (SAMRecord read : reservoir.getDownsampledContents()) chainedSelector.submitRead(read); - if(chainedSelector != null) + if (chainedSelector != null) chainedSelector.complete(); } @@ -987,7 +1024,7 @@ class NRandomReadSelector implements ReadSelector { public int getDownsamplingExtent() { return downsamplingExtent; - } + } public Collection getSelectedReads() { return reservoir.getDownsampledContents(); @@ -996,7 +1033,7 @@ class NRandomReadSelector implements ReadSelector { public void reset() { reservoir.clear(); downsamplingExtent = 0; - if(chainedSelector != null) + if (chainedSelector != null) chainedSelector.reset(); } } @@ -1005,23 +1042,23 @@ class NRandomReadSelector implements ReadSelector { * Note: stores reads by sample ID string, not by sample object */ class SamplePartitioner implements ReadSelector { - private final Map readsBySample; + private final Map readsBySample; private long readsSeen = 0; - public SamplePartitioner(Map readSelectors) { + public SamplePartitioner(Map readSelectors) { readsBySample = readSelectors; } public void submitRead(SAMRecord read) { - String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null; - if(readsBySample.containsKey(sampleName)) + String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; + if (readsBySample.containsKey(sampleName)) readsBySample.get(sampleName).submitRead(read); readsSeen++; } public void notifyReadRejected(SAMRecord read) { - String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null; - if(readsBySample.containsKey(sampleName)) + String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; + if (readsBySample.containsKey(sampleName)) readsBySample.get(sampleName).notifyReadRejected(read); readsSeen++; } @@ -1040,23 +1077,23 @@ class SamplePartitioner implements ReadSelector { public int getDownsamplingExtent() { int downsamplingExtent = 0; - for(ReadSelector storage: readsBySample.values()) - downsamplingExtent = Math.max(downsamplingExtent,storage.getDownsamplingExtent()); + for (ReadSelector storage : readsBySample.values()) + downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent()); return downsamplingExtent; } - + public Collection getSelectedReads() { throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner."); } public ReadSelector getSelectedReads(String sampleName) { - if(!readsBySample.containsKey(sampleName)) + if (!readsBySample.containsKey(sampleName)) throw new NoSuchElementException("Sample name not found"); return readsBySample.get(sampleName); } public void reset() { - for(ReadSelector storage: readsBySample.values()) + for (ReadSelector storage : readsBySample.values()) storage.reset(); readsSeen = 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 312b505ec..507a6559c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -25,13 +25,13 @@ public class BaseQualityRankSumTest extends RankSumTest { protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { for ( final PileupElement p : pileup ) { if( isUsableBase(p) ) { - if ( p.getBase() == ref ) { + if ( p.getBase() == ref ) refQuals.add((double)p.getQual()); - } else if ( p.getBase() == alt ) { + else if ( p.getBase() == alt ) altQuals.add((double)p.getQual()); - } } } + } protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? @@ -57,8 +57,6 @@ public class BaseQualityRankSumTest extends RankSumTest { refQuals.add(-10.0*refLikelihood); else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) altQuals.add(-10.0*altLikelihood); - - } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 0dda02421..987579ab8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -205,7 +205,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat for ( Map.Entry sample : stratifiedContexts.entrySet() ) { for (PileupElement p : sample.getValue().getBasePileup()) { - if ( p.isDeletion() || p.isReducedRead() ) // ignore deletions and reduced reads + if ( p.isDeletion() || p.getRead().isReducedRead() ) // ignore deletions and reduced reads continue; if ( p.getRead().getMappingQuality() < 20 || p.getQual() < 20 ) @@ -258,7 +258,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat continue; for (final PileupElement p: pileup) { - if ( p.isReducedRead() ) // ignore reduced reads + if ( p.getRead().isReducedRead() ) // ignore reduced reads continue; if ( p.getRead().getMappingQuality() < 20) continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 551f8e2cf..40b5aa4d5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -62,15 +62,15 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot private final static char REGEXP_WILDCARD = '.'; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here + if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here return null; - if (vc.isSNP() && !vc.isBiallelic()) + if (vc.isSNP() && !vc.isBiallelic()) return null; final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values()); - final int contextWingSize = Math.min(((int)ref.getWindow().size() - 1)/2, MIN_CONTEXT_WING_SIZE); + final int contextWingSize = Math.min(((int) ref.getWindow().size() - 1) / 2, MIN_CONTEXT_WING_SIZE); final int contextSize = contextWingSize * 2 + 1; final int locus = ref.getLocus().getStart() + (ref.getLocus().getStop() - ref.getLocus().getStart()) / 2; @@ -84,14 +84,14 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot if (pileup == null) return null; - + final List haplotypes = computeHaplotypes(pileup, contextSize, locus, vc); - final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage(); + final MathUtils.RunningAverage scoreRA = new MathUtils.RunningAverage(); if (haplotypes != null) { - for ( final Genotype genotype : vc.getGenotypes()) { + for (final Genotype genotype : vc.getGenotypes()) { final AlignmentContext thisContext = stratifiedContexts.get(genotype.getSampleName()); - if ( thisContext != null ) { + if (thisContext != null) { final ReadBackedPileup thisPileup; if (thisContext.hasExtendedEventPileup()) thisPileup = thisContext.getExtendedEventPileup(); @@ -102,14 +102,13 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot if (thisPileup != null) { if (vc.isSNP()) - scoreRA.add( scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus) ); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense + scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense else if (vc.isIndel() || vc.isMixed()) { Double d = scoreIndelsAgainstHaplotypes(thisPileup); if (d == null) return null; - scoreRA.add( d ); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense - } - else + scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense + } else return null; } } @@ -122,12 +121,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot return map; } - private class HaplotypeComparator implements Comparator{ + private class HaplotypeComparator implements Comparator { public int compare(Haplotype a, Haplotype b) { if (a.getQualitySum() < b.getQualitySum()) return 1; - if (a.getQualitySum() > b.getQualitySum()){ + if (a.getQualitySum() > b.getQualitySum()) { return -1; } return 0; @@ -137,39 +136,38 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot private List computeHaplotypes(final ReadBackedPileup pileup, final int contextSize, final int locus, final VariantContext vc) { // Compute all possible haplotypes consistent with current pileup - int haplotypesToCompute = vc.getAlternateAlleles().size()+1; + int haplotypesToCompute = vc.getAlternateAlleles().size() + 1; final PriorityQueue candidateHaplotypeQueue = new PriorityQueue(100, new HaplotypeComparator()); final PriorityQueue consensusHaplotypeQueue = new PriorityQueue(MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER, new HaplotypeComparator()); - for ( final PileupElement p : pileup ) { + for (final PileupElement p : pileup) { final Haplotype haplotypeFromRead = getHaplotypeFromRead(p, contextSize, locus); candidateHaplotypeQueue.add(haplotypeFromRead); } // Now that priority queue has been built with all reads at context, we need to merge and find possible segregating haplotypes Haplotype elem; - while ((elem = candidateHaplotypeQueue.poll()) != null) { + while ((elem = candidateHaplotypeQueue.poll()) != null) { boolean foundHaplotypeMatch = false; Haplotype lastCheckedHaplotype = null; - for ( final Haplotype haplotypeFromList : consensusHaplotypeQueue ) { + for (final Haplotype haplotypeFromList : consensusHaplotypeQueue) { final Haplotype consensusHaplotype = getConsensusHaplotype(elem, haplotypeFromList); - if (consensusHaplotype != null) { + if (consensusHaplotype != null) { foundHaplotypeMatch = true; if (consensusHaplotype.getQualitySum() > haplotypeFromList.getQualitySum()) { consensusHaplotypeQueue.remove(haplotypeFromList); consensusHaplotypeQueue.add(consensusHaplotype); } break; - } - else { + } else { lastCheckedHaplotype = haplotypeFromList; } } if (!foundHaplotypeMatch && consensusHaplotypeQueue.size() < MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER) { consensusHaplotypeQueue.add(elem); - } else if (!foundHaplotypeMatch && lastCheckedHaplotype != null && elem.getQualitySum() > lastCheckedHaplotype.getQualitySum() ) { + } else if (!foundHaplotypeMatch && lastCheckedHaplotype != null && elem.getQualitySum() > lastCheckedHaplotype.getQualitySum()) { consensusHaplotypeQueue.remove(lastCheckedHaplotype); consensusHaplotypeQueue.add(elem); } @@ -180,12 +178,14 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // The consensus haplotypes are in a quality-ordered priority queue, so the best haplotypes are just the ones at the front of the queue final Haplotype haplotype1 = consensusHaplotypeQueue.poll(); - Listhlist = new ArrayList(); + List hlist = new ArrayList(); hlist.add(new Haplotype(haplotype1.getBases(), 60)); - for (int k=1; k < haplotypesToCompute; k++) { + for (int k = 1; k < haplotypesToCompute; k++) { Haplotype haplotype2 = consensusHaplotypeQueue.poll(); - if(haplotype2 == null ) { haplotype2 = haplotype1; } // Sometimes only the reference haplotype can be found + if (haplotype2 == null) { + haplotype2 = haplotype1; + } // Sometimes only the reference haplotype can be found hlist.add(new Haplotype(haplotype2.getBases(), 20)); } return hlist; @@ -194,36 +194,43 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) { - final SAMRecord read = p.getRead(); + final GATKSAMRecord read = p.getRead(); int readOffsetFromPileup = p.getOffset(); final byte[] haplotypeBases = new byte[contextSize]; - Arrays.fill(haplotypeBases, (byte)REGEXP_WILDCARD); + Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD); final double[] baseQualities = new double[contextSize]; Arrays.fill(baseQualities, 0.0); byte[] readBases = read.getReadBases(); - readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string + readBases = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readBases); // Adjust the read bases based on the Cigar string byte[] readQuals = read.getBaseQualities(); - readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string + readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string - readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), readOffsetFromPileup, p.getRead().getAlignmentStart(), locus); - final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2; + readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus); + final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2; - for (int i = 0; i < contextSize; i++ ) { + for (int i = 0; i < contextSize; i++) { final int baseOffset = i + baseOffsetStart; - if ( baseOffset < 0 ) { + if (baseOffset < 0) { continue; } - if ( baseOffset >= readBases.length ) { + if (baseOffset >= readBases.length) { break; } - if( readQuals[baseOffset] == PileupElement.DELETION_BASE) { readQuals[baseOffset] = PileupElement.DELETION_QUAL; } - if( !BaseUtils.isRegularBase(readBases[baseOffset]) ) { readBases[baseOffset] = (byte)REGEXP_WILDCARD; readQuals[baseOffset] = (byte) 0; } // N's shouldn't be treated as distinct bases - readQuals[baseOffset] = (byte)Math.min((int)readQuals[baseOffset], p.getMappingQual()); - if( ((int)readQuals[baseOffset]) < 5 ) { readQuals[baseOffset] = (byte) 0; } // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them + if (readQuals[baseOffset] == PileupElement.DELETION_BASE) { + readQuals[baseOffset] = PileupElement.DELETION_QUAL; + } + if (!BaseUtils.isRegularBase(readBases[baseOffset])) { + readBases[baseOffset] = (byte) REGEXP_WILDCARD; + readQuals[baseOffset] = (byte) 0; + } // N's shouldn't be treated as distinct bases + readQuals[baseOffset] = (byte) Math.min((int) readQuals[baseOffset], p.getMappingQual()); + if (((int) readQuals[baseOffset]) < 5) { + readQuals[baseOffset] = (byte) 0; + } // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them haplotypeBases[i] = readBases[baseOffset]; - baseQualities[i] = (double)readQuals[baseOffset]; + baseQualities[i] = (double) readQuals[baseOffset]; } return new Haplotype(haplotypeBases, baseQualities); @@ -238,7 +245,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } byte chA, chB; - final byte wc = (byte)REGEXP_WILDCARD; + final byte wc = (byte) REGEXP_WILDCARD; final int length = a.length; final byte[] consensusChars = new byte[length]; @@ -247,7 +254,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final double[] qualsA = haplotypeA.getQuals(); final double[] qualsB = haplotypeB.getQuals(); - for (int i=0; i < length; i++) { + for (int i = 0; i < length; i++) { chA = a[i]; chB = b[i]; @@ -257,17 +264,15 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot if ((chA == wc) && (chB == wc)) { consensusChars[i] = wc; consensusQuals[i] = 0.0; - } - else if ((chA == wc)) { + } else if ((chA == wc)) { consensusChars[i] = chB; consensusQuals[i] = qualsB[i]; - } - else if ((chB == wc)){ + } else if ((chB == wc)) { consensusChars[i] = chA; consensusQuals[i] = qualsA[i]; } else { consensusChars[i] = chA; - consensusQuals[i] = qualsA[i]+qualsB[i]; + consensusQuals[i] = qualsA[i] + qualsB[i]; } } @@ -276,31 +281,33 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes private double scoreReadsAgainstHaplotypes(final List haplotypes, final ReadBackedPileup pileup, final int contextSize, final int locus) { - if ( DEBUG ) System.out.printf("HAP1: %s%n", haplotypes.get(0)); - if ( DEBUG ) System.out.printf("HAP2: %s%n", haplotypes.get(1)); + if (DEBUG) System.out.printf("HAP1: %s%n", haplotypes.get(0)); + if (DEBUG) System.out.printf("HAP2: %s%n", haplotypes.get(1)); final ArrayList haplotypeScores = new ArrayList(); - for ( final PileupElement p : pileup ) { + for (final PileupElement p : pileup) { // Score all the reads in the pileup, even the filtered ones final double[] scores = new double[haplotypes.size()]; - for ( int i = 0; i < haplotypes.size(); i++ ) { + for (int i = 0; i < haplotypes.size(); i++) { final Haplotype haplotype = haplotypes.get(i); final double score = scoreReadAgainstHaplotype(p, contextSize, haplotype, locus); scores[i] = score; - if ( DEBUG ) { System.out.printf(" vs. haplotype %d = %f%n", i, score); } + if (DEBUG) { + System.out.printf(" vs. haplotype %d = %f%n", i, score); + } } haplotypeScores.add(scores); } double overallScore = 0.0; - for ( final double[] readHaplotypeScores : haplotypeScores ) { + for (final double[] readHaplotypeScores : haplotypeScores) { overallScore += MathUtils.arrayMin(readHaplotypeScores); } return overallScore; } - private double scoreReadAgainstHaplotype(final PileupElement p, final int contextSize, final Haplotype haplotype, final int locus ) { + private double scoreReadAgainstHaplotype(final PileupElement p, final int contextSize, final Haplotype haplotype, final int locus) { double expected = 0.0; double mismatches = 0.0; @@ -315,33 +322,35 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // the chance that it is actually a mismatch is 1 - e, since any of the other 3 options would be a mismatch. // so the probability-weighted mismatch rate is sum_i ( matched ? e_i / 3 : 1 - e_i ) for i = 1 ... n final byte[] haplotypeBases = haplotype.getBases(); - final SAMRecord read = p.getRead(); + final GATKSAMRecord read = p.getRead(); byte[] readBases = read.getReadBases(); readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string byte[] readQuals = read.getBaseQualities(); readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string int readOffsetFromPileup = p.getOffset(); - readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), readOffsetFromPileup, p.getRead().getAlignmentStart(), locus); - final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1)/2; + readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus); + final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2; - for ( int i = 0; i < contextSize; i++ ) { + for (int i = 0; i < contextSize; i++) { final int baseOffset = i + baseOffsetStart; - if ( baseOffset < 0 ) { + if (baseOffset < 0) { continue; } - if ( baseOffset >= readBases.length ) { + if (baseOffset >= readBases.length) { break; } final byte haplotypeBase = haplotypeBases[i]; final byte readBase = readBases[baseOffset]; - final boolean matched = ( readBase == haplotypeBase || haplotypeBase == (byte)REGEXP_WILDCARD ); + final boolean matched = (readBase == haplotypeBase || haplotypeBase == (byte) REGEXP_WILDCARD); byte qual = readQuals[baseOffset]; - if( qual == PileupElement.DELETION_BASE ) { qual = PileupElement.DELETION_QUAL; } // calcAlignmentByteArrayOffset fills the readQuals array with DELETION_BASE at deletions - qual = (byte)Math.min((int)qual, p.getMappingQual()); - if( ((int) qual) >= 5 ) { // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them + if (qual == PileupElement.DELETION_BASE) { + qual = PileupElement.DELETION_QUAL; + } // calcAlignmentByteArrayOffset fills the readQuals array with DELETION_BASE at deletions + qual = (byte) Math.min((int) qual, p.getMappingQual()); + if (((int) qual) >= 5) { // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them final double e = QualityUtils.qualToErrorProb(qual); expected += e; mismatches += matched ? e : 1.0 - e / 3.0; @@ -355,26 +364,27 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } - private Double scoreIndelsAgainstHaplotypes(final ReadBackedPileup pileup) { final ArrayList haplotypeScores = new ArrayList(); - final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); + final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - if (indelLikelihoodMap== null) + if (indelLikelihoodMap == null) return null; - for (final PileupElement p: pileup) { + for (final PileupElement p : pileup) { if (indelLikelihoodMap.containsKey(p)) { // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); + LinkedHashMap el = indelLikelihoodMap.get(p); // Score all the reads in the pileup, even the filtered ones final double[] scores = new double[el.size()]; int i = 0; - for (Allele a: el.keySet() ) { + for (Allele a : el.keySet()) { scores[i++] = -el.get(a); - if ( DEBUG ) { System.out.printf(" vs. haplotype %d = %f%n", i-1, scores[i-1]); } + if (DEBUG) { + System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]); + } } haplotypeScores.add(scores); @@ -383,7 +393,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot // indel likelihoods are stric log-probs, not phred scored double overallScore = 0.0; - for ( final double[] readHaplotypeScores : haplotypeScores ) { + for (final double[] readHaplotypeScores : haplotypeScores) { overallScore += MathUtils.arrayMin(readHaplotypeScores); } @@ -392,6 +402,11 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } - public List getKeyNames() { return Arrays.asList("HaplotypeScore"); } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes")); } + public List getKeyNames() { + return Arrays.asList("HaplotypeScore"); + } + + public List getDescriptions() { + return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes")); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index c5a2df1fd..e0e62cdb0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -30,11 +30,11 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar static final boolean DEBUG = false; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) + if (stratifiedContexts.size() == 0) return null; - + final GenotypesContext genotypes = vc.getGenotypes(); - if ( genotypes == null || genotypes.size() == 0 ) + if (genotypes == null || genotypes.size() == 0) return null; @@ -43,19 +43,18 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar if (vc.isSNP() && vc.isBiallelic()) { // todo - no current support for multiallelic snps - for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { + for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) { final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - if ( context == null ) { + if (context == null) { continue; } fillQualsFromPileup(ref.getBase(), vc.getAlternateAllele(0).getBases()[0], context.getBasePileup(), refQuals, altQuals); } - } - else if (vc.isIndel() || vc.isMixed()) { + } else if (vc.isIndel() || vc.isMixed()) { - for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { + for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) { final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - if ( context == null ) { + if (context == null) { continue; } @@ -74,46 +73,47 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar fillIndelQualsFromPileup(pileup, refQuals, altQuals); } - } - else + } else return null; final MannWhitneyU mannWhitneyU = new MannWhitneyU(); - for ( final Double qual : altQuals ) { + for (final Double qual : altQuals) { mannWhitneyU.add(qual, MannWhitneyU.USet.SET1); } - for ( final Double qual : refQuals ) { + for (final Double qual : refQuals) { mannWhitneyU.add(qual, MannWhitneyU.USet.SET2); } if (DEBUG) { - System.out.format("%s, REF QUALS:",this.getClass().getName()); - for ( final Double qual : refQuals ) - System.out.format("%4.1f ",qual); + System.out.format("%s, REF QUALS:", this.getClass().getName()); + for (final Double qual : refQuals) + System.out.format("%4.1f ", qual); System.out.println(); - System.out.format("%s, ALT QUALS:",this.getClass().getName()); - for ( final Double qual : altQuals ) - System.out.format("%4.1f ",qual); + System.out.format("%s, ALT QUALS:", this.getClass().getName()); + for (final Double qual : altQuals) + System.out.format("%4.1f ", qual); System.out.println(); } // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) - final Pair testResults = mannWhitneyU.runOneSidedTest( MannWhitneyU.USet.SET1 ); + final Pair testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1); final Map map = new HashMap(); - if ( ! Double.isNaN(testResults.first) ) + if (!Double.isNaN(testResults.first)) map.put(getKeyNames().get(0), String.format("%.3f", testResults.first)); return map; } protected abstract void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals); + protected abstract void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals); - protected static boolean isUsableBase( final PileupElement p ) { - return !( p.isDeletion() || - p.getMappingQual() == 0 || - p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || - ((int)p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE ); // need the unBAQed quality score here + protected static boolean isUsableBase(final PileupElement p) { + return !(p.isInsertionAtBeginningOfRead() || + p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); // need the unBAQed quality score here } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index d762af428..b0039d1a0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -24,27 +24,32 @@ import java.util.List; */ public class ReadPosRankSumTest extends RankSumTest { - public List getKeyNames() { return Arrays.asList("ReadPosRankSum"); } + public List getKeyNames() { + return Arrays.asList("ReadPosRankSum"); + } - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias")); } + public List getDescriptions() { + return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias")); + } protected void fillQualsFromPileup(byte ref, byte alt, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( final PileupElement p : pileup ) { - if( isUsableBase(p) ) { - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p.getOffset(), 0, 0); + for (final PileupElement p : pileup) { + if (isUsableBase(p)) { + int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0); final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead()); - if( readPos > numAlignedBases / 2 ) { - readPos = numAlignedBases - ( readPos + 1 ); - } + if (readPos > numAlignedBases / 2) + readPos = numAlignedBases - (readPos + 1); + + + if (p.getBase() == ref) + refQuals.add((double) readPos); + else if (p.getBase() == alt) + altQuals.add((double) readPos); - if ( p.getBase() == ref ) { - refQuals.add( (double)readPos ); - } else if ( p.getBase() == alt ) { - altQuals.add( (double)readPos ); - } } } } + protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele // to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element. @@ -52,18 +57,15 @@ public class ReadPosRankSumTest extends RankSumTest { // To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles. // If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pielup element is "ref" // otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt" - final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - for (final PileupElement p: pileup) { + final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); + for (final PileupElement p : pileup) { if (indelLikelihoodMap.containsKey(p)) { - // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); - // by design, first element in LinkedHashMap was ref allele - double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; + LinkedHashMap el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read + double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele for (Allele a : el.keySet()) { - if (a.isReference()) - refLikelihood =el.get(a); + refLikelihood = el.get(a); else { double like = el.get(a); if (like >= altLikelihood) @@ -75,23 +77,22 @@ public class ReadPosRankSumTest extends RankSumTest { final int numAlignedBases = getNumAlignedBases(p.getRead()); int rp = readPos; - if( readPos > numAlignedBases / 2 ) { - readPos = numAlignedBases - ( readPos + 1 ); + if (readPos > numAlignedBases / 2) { + readPos = numAlignedBases - (readPos + 1); } - //if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases); + //if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases); // if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads // where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping. - // if (readPos < -1) + // if (readPos < -1) // return; - if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) { - refQuals.add((double)readPos); + if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) { + refQuals.add((double) readPos); //if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos); - } - else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) { - altQuals.add((double)readPos); - //if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos); + } else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) { + altQuals.add((double) readPos); + //if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos); } @@ -115,7 +116,7 @@ public class ReadPosRankSumTest extends RankSumTest { // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, // and may leave a string of Q2 bases still hanging off the reads. - for (int i=numStartClippedBases; i < unclippedReadBases.length; i++) { + for (int i = numStartClippedBases; i < unclippedReadBases.length; i++) { if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numStartClippedBases++; else @@ -134,7 +135,7 @@ public class ReadPosRankSumTest extends RankSumTest { // compute total number of clipped bases (soft or hard clipped) // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); - CigarElement last = c.getCigarElement(c.numCigarElements()-1); + CigarElement last = c.getCigarElement(c.numCigarElements() - 1); int numEndClippedBases = 0; if (last.getOperator() == CigarOperator.H) { @@ -145,7 +146,7 @@ public class ReadPosRankSumTest extends RankSumTest { // Do a stricter base clipping than provided by CIGAR string, since this one may be too conservative, // and may leave a string of Q2 bases still hanging off the reads. - for (int i=unclippedReadBases.length-numEndClippedBases-1; i >= 0; i-- ){ + for (int i = unclippedReadBases.length - numEndClippedBases - 1; i >= 0; i--) { if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numEndClippedBases++; else @@ -157,8 +158,6 @@ public class ReadPosRankSumTest extends RankSumTest { } int getOffsetFromClippedReadStart(SAMRecord read, int offset) { - - - return offset - getNumClippedBasesAtStart(read); + return offset - getNumClippedBasesAtStart(read); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index ae7077230..7143606ae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -278,7 +278,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { if ( qual == 0 ) return 0; - if ( elt.isReducedRead() ) { + if ( elt.getRead().isReducedRead() ) { // reduced read representation if ( BaseUtils.isRegularBase( obsBase )) { int representativeCount = elt.getRepresentativeCount(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 0756caf03..9126c0495 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -60,14 +60,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private final int maxAlternateAlleles; private PairHMMIndelErrorModel pairModel; - private static ThreadLocal>> indelLikelihoodMap = - new ThreadLocal>>() { - protected synchronized HashMap> initialValue() { - return new HashMap>(); + private static ThreadLocal>> indelLikelihoodMap = + new ThreadLocal>>() { + protected synchronized HashMap> initialValue() { + return new HashMap>(); } }; - private LinkedHashMap haplotypeMap; + private LinkedHashMap haplotypeMap; // gdebug removeme // todo -cleanup @@ -75,13 +75,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private ArrayList alleleList; static { - indelLikelihoodMap.set(new HashMap>()); + indelLikelihoodMap.set(new HashMap>()); } protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); - pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY, + pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); alleleList = new ArrayList(); getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; @@ -91,7 +91,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES; doMultiAllelicCalls = UAC.MULTI_ALLELIC; - haplotypeMap = new LinkedHashMap(); + haplotypeMap = new LinkedHashMap(); ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES; } @@ -99,15 +99,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private ArrayList computeConsensusAlleles(ReferenceContext ref, Map contexts, AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) { - Allele refAllele=null, altAllele=null; + Allele refAllele = null, altAllele = null; GenomeLoc loc = ref.getLocus(); ArrayList aList = new ArrayList(); - HashMap consensusIndelStrings = new HashMap(); + HashMap consensusIndelStrings = new HashMap(); int insCount = 0, delCount = 0; // quick check of total number of indels in pileup - for ( Map.Entry sample : contexts.entrySet() ) { + for (Map.Entry sample : contexts.entrySet()) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); @@ -118,21 +118,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping) return aList; - for ( Map.Entry sample : contexts.entrySet() ) { + for (Map.Entry sample : contexts.entrySet()) { // todo -- warning, can be duplicating expensive partition here AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); - - - for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) { + for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) { //SAMRecord read = p.getRead(); GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); if (read == null) continue; - if(ReadUtils.is454Read(read)) { + if (ReadUtils.is454Read(read)) { continue; } @@ -151,62 +149,57 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // In this case, the read could have any of the inserted bases and we need to build a consensus for (String s : consensusIndelStrings.keySet()) { int cnt = consensusIndelStrings.get(s); - if (s.startsWith(indelString)){ + if (s.startsWith(indelString)) { // case 1: current insertion is prefix of indel in hash map - consensusIndelStrings.put(s,cnt+1); + consensusIndelStrings.put(s, cnt + 1); foundKey = true; break; - } - else if (indelString.startsWith(s)) { + } else if (indelString.startsWith(s)) { // case 2: indel stored in hash table is prefix of current insertion // In this case, new bases are new key. consensusIndelStrings.remove(s); - consensusIndelStrings.put(indelString,cnt+1); + consensusIndelStrings.put(indelString, cnt + 1); foundKey = true; break; } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key - consensusIndelStrings.put(indelString,1); + consensusIndelStrings.put(indelString, 1); - } - else if (read.getAlignmentStart() == loc.getStart()+1) { + } else if (read.getAlignmentStart() == loc.getStart() + 1) { // opposite corner condition: read will start at current locus with an insertion for (String s : consensusIndelStrings.keySet()) { int cnt = consensusIndelStrings.get(s); - if (s.endsWith(indelString)){ + if (s.endsWith(indelString)) { // case 1: current insertion is suffix of indel in hash map - consensusIndelStrings.put(s,cnt+1); + consensusIndelStrings.put(s, cnt + 1); foundKey = true; break; - } - else if (indelString.endsWith(s)) { + } else if (indelString.endsWith(s)) { // case 2: indel stored in hash table is suffix of current insertion // In this case, new bases are new key. consensusIndelStrings.remove(s); - consensusIndelStrings.put(indelString,cnt+1); + consensusIndelStrings.put(indelString, cnt + 1); foundKey = true; break; } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key - consensusIndelStrings.put(indelString,1); + consensusIndelStrings.put(indelString, 1); - } - else { + } else { // normal case: insertion somewhere in the middle of a read: add count to hash map - int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0; - consensusIndelStrings.put(indelString,cnt+1); + int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0; + consensusIndelStrings.put(indelString, cnt + 1); } - } - else if (p.isDeletion()) { - indelString = String.format("D%d",p.getEventLength()); - int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0; - consensusIndelStrings.put(indelString,cnt+1); + } else if (p.isDeletion()) { + indelString = String.format("D%d", p.getEventLength()); + int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0; + consensusIndelStrings.put(indelString, cnt + 1); } } @@ -227,18 +220,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // get deletion length int dLen = Integer.valueOf(s.substring(1)); // get ref bases of accurate deletion - int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart(); + int startIdxInReference = 1 + loc.getStart() - ref.getWindow().getStart(); stop = loc.getStart() + dLen; - byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); + byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); if (Allele.acceptableAlleleBases(refBases)) { - refAllele = Allele.create(refBases,true); + refAllele = Allele.create(refBases, true); altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } - } - else { + } else { // insertion case - if (Allele.acceptableAlleleBases(s)) { + if (Allele.acceptableAlleleBases(s)) { refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); altAllele = Allele.create(s, false); stop = loc.getStart(); @@ -288,7 +280,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood Allele alternateAlleleToUse, boolean useBAQedPileup, GenomeLocParser locParser) { - if ( tracker == null ) + if (tracker == null) return null; GenomeLoc loc = ref.getLocus(); @@ -299,12 +291,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // starting a new site: clear allele list alleleList.clear(); lastSiteVisited = ref.getLocus(); - indelLikelihoodMap.set(new HashMap>()); + indelLikelihoodMap.set(new HashMap>()); haplotypeMap.clear(); if (getAlleleListFromVCF) { - for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) { - if( vc_input != null && + for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) { + if (vc_input != null && allowableTypes.contains(vc_input.getType()) && ref.getLocus().getStart() == vc_input.getStart()) { vc = vc_input; @@ -312,7 +304,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } } // ignore places where we don't have a variant - if ( vc == null ) + if (vc == null) return null; alleleList.clear(); @@ -324,15 +316,13 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood else alleleList.add(a); - } - else { + } else { for (Allele a : vc.getAlleles()) alleleList.add(a); } - } - else { - alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser); + } else { + alleleList = computeConsensusAlleles(ref, contexts, contextType, locParser); if (alleleList.isEmpty()) return null; } @@ -342,9 +332,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return null; // check if there is enough reference window to create haplotypes (can be an issue at end of contigs) - if (ref.getWindow().getStop() < loc.getStop()+HAPLOTYPE_SIZE) + if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null; - if ( !(priors instanceof DiploidIndelGenotypePriors) ) + if (!(priors instanceof DiploidIndelGenotypePriors)) throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model"); if (alleleList.isEmpty()) @@ -355,8 +345,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // look for alt allele that has biggest length distance to ref allele int maxLenDiff = 0; - for (Allele a: alleleList) { - if(a.isNonReference()) { + for (Allele a : alleleList) { + if (a.isNonReference()) { int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length()); if (lenDiff > maxLenDiff) { maxLenDiff = lenDiff; @@ -366,11 +356,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length(); - final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1; - final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1; + final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1; + final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; - if (hsize <=0) { - logger.warn(String.format("Warning: event at location %s can't be genotyped, skipping",loc.toString())); + if (hsize <= 0) { + logger.warn(String.format("Warning: event at location %s can't be genotyped, skipping", loc.toString())); return null; } haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(), @@ -388,7 +378,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. - for ( Map.Entry sample : contexts.entrySet() ) { + for (Map.Entry sample : contexts.entrySet()) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); ReadBackedPileup pileup = null; @@ -397,8 +387,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood else if (context.hasBasePileup()) pileup = context.getBasePileup(); - if (pileup != null ) { - final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); + if (pileup != null) { + final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods); HashMap attributes = new HashMap(); @@ -407,9 +397,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); if (DEBUG) { - System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString()); - for (int k=0; k < genotypeLikelihoods.length; k++) - System.out.format("%1.4f ",genotypeLikelihoods[k]); + System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString()); + for (int k = 0; k < genotypeLikelihoods.length; k++) + System.out.format("%1.4f ", genotypeLikelihoods[k]); System.out.println(); } } @@ -421,21 +411,21 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // for indels, stop location is one more than ref allele length boolean hasNullAltAllele = false; - for ( Allele a : alleles ) { - if ( a.isNull() ) { + for (Allele a : alleles) { + if (a.isNull()) { hasNullAltAllele = true; break; } } int endLoc = loc.getStart() + refAllele.length(); - if( !hasNullAltAllele ) + if (!hasNullAltAllele) endLoc--; return endLoc; } - public static HashMap> getIndelLikelihoodMap() { + public static HashMap> getIndelLikelihoodMap() { return indelLikelihoodMap.get(); } @@ -443,8 +433,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // so that per-sample DP will include deletions covering the event. protected int getFilteredDepth(ReadBackedPileup pileup) { int count = 0; - for ( PileupElement p : pileup ) { - if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()) ) + for (PileupElement p : pileup) { + if (p.isDeletion() || p.isInsertionAtBeginningOfRead() || BaseUtils.isRegularBase(p.getBase())) count++; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 81c766e4d..d9ee2ba1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -212,7 +212,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC public class BAQedPileupElement extends PileupElement { public BAQedPileupElement( final PileupElement PE ) { - super(PE.getRead(), PE.getOffset()); + super(PE.getRead(), PE.getOffset(), PE.isDeletion()); } @Override diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 586b86490..1fa7101ca 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -40,7 +40,7 @@ import java.util.*; * @author mhanna * @version 0.1 */ -public abstract class AbstractReadBackedPileup,PE extends PileupElement> implements ReadBackedPileup { +public abstract class AbstractReadBackedPileup, PE extends PileupElement> implements ReadBackedPileup { protected final GenomeLoc loc; protected final PileupElementTracker pileupElementTracker; @@ -55,23 +55,18 @@ public abstract class AbstractReadBackedPileup reads, List offsets ) { + public AbstractReadBackedPileup(GenomeLoc loc, List reads, List offsets) { this.loc = loc; - this.pileupElementTracker = readsOffsets2Pileup(reads,offsets); + this.pileupElementTracker = readsOffsets2Pileup(reads, offsets); } - public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset ) { - this.loc = loc; - this.pileupElementTracker = readsOffsets2Pileup(reads,offset); - } /** * Create a new version of a read backed pileup at loc without any aligned reads - * */ public AbstractReadBackedPileup(GenomeLoc loc) { this(loc, new UnifiedPileupElementTracker()); @@ -81,11 +76,10 @@ public abstract class AbstractReadBackedPileup pileup) { - if ( loc == null ) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup"); - if ( pileup == null ) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup"); + if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup"); + if (pileup == null) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup"); this.loc = loc; this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); @@ -94,12 +88,13 @@ public abstract class AbstractReadBackedPileup pileup, int size, int nDeletions, int nMQ0Reads) { - if ( loc == null ) throw new ReviewedStingException("Illegal null genomeloc in UnifiedReadBackedPileup"); - if ( pileup == null ) throw new ReviewedStingException("Illegal null pileup in UnifiedReadBackedPileup"); + if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in UnifiedReadBackedPileup"); + if (pileup == null) throw new ReviewedStingException("Illegal null pileup in UnifiedReadBackedPileup"); this.loc = loc; this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); @@ -115,16 +110,21 @@ public abstract class AbstractReadBackedPileup> pileupsBySample) { + protected AbstractReadBackedPileup(GenomeLoc loc, Map> pileupsBySample) { this.loc = loc; PerSamplePileupElementTracker tracker = new PerSamplePileupElementTracker(); - for(Map.Entry> pileupEntry: pileupsBySample.entrySet()) { - tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker); + for (Map.Entry> pileupEntry : pileupsBySample.entrySet()) { + tracker.addElements(pileupEntry.getKey(), pileupEntry.getValue().pileupElementTracker); addPileupToCumulativeStats(pileupEntry.getValue()); } this.pileupElementTracker = tracker; } + public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset) { + this.loc = loc; + this.pileupElementTracker = readsOffsets2Pileup(reads, offset); + } + /** * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling @@ -135,12 +135,12 @@ public abstract class AbstractReadBackedPileup pileup) { + protected void addPileupToCumulativeStats(AbstractReadBackedPileup pileup) { size += pileup.getNumberOfElements(); abstractSize += pileup.depthOfCoverage(); nDeletions += pileup.getNumberOfDeletions(); @@ -167,14 +167,17 @@ public abstract class AbstractReadBackedPileup readsOffsets2Pileup(List reads, List offsets ) { - if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); - if ( offsets == null ) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup"); - if ( reads.size() != offsets.size() ) throw new ReviewedStingException("Reads and offset lists have different sizes!"); + private PileupElementTracker readsOffsets2Pileup(List reads, List offsets) { + if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); + if (offsets == null) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup"); + if (reads.size() != offsets.size()) + throw new ReviewedStingException("Reads and offset lists have different sizes!"); UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); - for ( int i = 0; i < reads.size(); i++ ) { - pileup.add(createNewPileupElement(reads.get(i),offsets.get(i))); + for (int i = 0; i < reads.size(); i++) { + GATKSAMRecord read = reads.get(i); + int offset = offsets.get(i); + pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D)); } return pileup; @@ -187,20 +190,21 @@ public abstract class AbstractReadBackedPileup readsOffsets2Pileup(List reads, int offset ) { - if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); - if ( offset < 0 ) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup"); + private PileupElementTracker readsOffsets2Pileup(List reads, int offset) { + if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); + if (offset < 0) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup"); UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); - for ( int i = 0; i < reads.size(); i++ ) { - pileup.add(createNewPileupElement( reads.get(i), offset )); + for (GATKSAMRecord read : reads) { + pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D)); } return pileup; } - protected abstract AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); - protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset); + protected abstract AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); + + protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion); // -------------------------------------------------------- // @@ -217,32 +221,31 @@ public abstract class AbstractReadBackedPileup 0 ) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (getNumberOfDeletions() > 0) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getPileupWithoutDeletions(); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupWithoutDeletions(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); - } - else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for ( PE p : tracker ) { - if ( !p.isDeletion() ) { + for (PE p : tracker) { + if (!p.isDeletion()) { filteredTracker.add(p); } } - return (RBP)createNewPileup(loc, filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } else { - return (RBP)this; + return (RBP) this; } } @@ -256,21 +259,20 @@ public abstract class AbstractReadBackedPileup tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup(); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); - } - else { - Map filteredPileup = new HashMap(); + return (RBP) createNewPileup(loc, filteredTracker); + } else { + Map filteredPileup = new HashMap(); - for ( PE p : pileupElementTracker ) { + for (PE p : pileupElementTracker) { String readName = p.getRead().getReadName(); // if we've never seen this read before, life is good @@ -292,10 +294,10 @@ public abstract class AbstractReadBackedPileup filteredTracker = new UnifiedPileupElementTracker(); - for(PE filteredElement: filteredPileup.values()) + for (PE filteredElement : filteredPileup.values()) filteredTracker.add(filteredElement); - return (RBP)createNewPileup(loc,filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } @@ -309,300 +311,299 @@ public abstract class AbstractReadBackedPileup 0 ) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (getNumberOfMappingQualityZeroReads() > 0) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads(); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupWithoutMappingQualityZeroReads(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); - } - else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for ( PE p : tracker ) { - if ( p.getRead().getMappingQuality() > 0 ) { + for (PE p : tracker) { + if (p.getRead().getMappingQuality() > 0) { filteredTracker.add(p); } } - return (RBP)createNewPileup(loc, filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } else { - return (RBP)this; + return (RBP) this; } } public RBP getPositiveStrandPileup() { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getPositiveStrandPileup(); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPositiveStrandPileup(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); - } - else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + return (RBP) createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for ( PE p : tracker ) { - if ( !p.getRead().getReadNegativeStrandFlag() ) { + for (PE p : tracker) { + if (!p.getRead().getReadNegativeStrandFlag()) { filteredTracker.add(p); } } - return (RBP)createNewPileup(loc, filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } /** * Gets the pileup consisting of only reads on the negative strand. + * * @return A read-backed pileup consisting only of reads on the negative strand. */ public RBP getNegativeStrandPileup() { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getNegativeStrandPileup(); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getNegativeStrandPileup(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); - } - else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + return (RBP) createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for ( PE p : tracker ) { - if ( p.getRead().getReadNegativeStrandFlag() ) { + for (PE p : tracker) { + if (p.getRead().getReadNegativeStrandFlag()) { filteredTracker.add(p); } } - return (RBP)createNewPileup(loc, filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } /** * Gets a pileup consisting of all those elements passed by a given filter. + * * @param filter Filter to use when testing for elements. * @return a pileup without the given filtered elements. */ public RBP getFilteredPileup(PileupElementFilter filter) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getFilteredPileup(filter); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); - } - else { + return (RBP) createNewPileup(loc, filteredTracker); + } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for ( PE p : pileupElementTracker ) { - if( filter.allow(p) ) + for (PE p : pileupElementTracker) { + if (filter.allow(p)) filteredTracker.add(p); } - return (RBP)createNewPileup(loc, filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } - /** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from + /** + * Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. + * * @param minBaseQ * @param minMapQ * @return */ @Override - public RBP getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + public RBP getBaseAndMappingFilteredPileup(int minBaseQ, int minMapQ) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ, minMapQ); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return (RBP)createNewPileup(loc,filteredTracker); - } - else { + return (RBP) createNewPileup(loc, filteredTracker); + } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for ( PE p : pileupElementTracker ) { - if ( p.getRead().getMappingQuality() >= minMapQ && + for (PE p : pileupElementTracker) { + if (p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || - ((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement)p).getType() == ExtendedEventPileupElement.Type.NOEVENT) || - p.getQual() >= minBaseQ) ) { + ((p instanceof ExtendedEventPileupElement) && ((ExtendedEventPileupElement) p).getType() == ExtendedEventPileupElement.Type.NOEVENT) || + p.getQual() >= minBaseQ)) { filteredTracker.add(p); } } - return (RBP)createNewPileup(loc, filteredTracker); + return (RBP) createNewPileup(loc, filteredTracker); } } - /** Returns subset of this pileup that contains only bases with quality >= minBaseQ. + /** + * Returns subset of this pileup that contains only bases with quality >= minBaseQ. * This method allocates and returns a new instance of ReadBackedPileup. + * * @param minBaseQ * @return */ @Override - public RBP getBaseFilteredPileup( int minBaseQ ) { + public RBP getBaseFilteredPileup(int minBaseQ) { return getBaseAndMappingFilteredPileup(minBaseQ, -1); } - /** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. + /** + * Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. * This method allocates and returns a new instance of ReadBackedPileup. + * * @param minMapQ * @return */ @Override - public RBP getMappingFilteredPileup( int minMapQ ) { + public RBP getMappingFilteredPileup(int minMapQ) { return getBaseAndMappingFilteredPileup(-1, minMapQ); } /** * Gets a list of the read groups represented in this pileup. + * * @return */ @Override public Collection getReadGroups() { Set readGroups = new HashSet(); - for(PileupElement pileupElement: this) + for (PileupElement pileupElement : this) readGroups.add(pileupElement.getRead().getReadGroup().getReadGroupId()); return readGroups; } /** * Gets the pileup for a given read group. Horrendously inefficient at this point. + * * @param targetReadGroupId Identifier for the read group. * @return A read-backed pileup containing only the reads in the given read group. */ @Override public RBP getPileupForReadGroup(String targetReadGroupId) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroup(targetReadGroupId); - if(pileup != null) - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroup(targetReadGroupId); + if (pileup != null) + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; - } - else { + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; + } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for(PE p: pileupElementTracker) { + for (PE p : pileupElementTracker) { GATKSAMRecord read = p.getRead(); - if(targetReadGroupId != null) { - if(read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId())) + if (targetReadGroupId != null) { + if (read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId())) filteredTracker.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) + } else { + if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) filteredTracker.add(p); } } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; } } /** * Gets the pileup for a set of read groups. Horrendously inefficient at this point. + * * @param rgSet List of identifiers for the read groups. * @return A read-backed pileup containing only the reads in the given read groups. */ @Override public RBP getPileupForReadGroups(final HashSet rgSet) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroups(rgSet); - if(pileup != null) - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroups(rgSet); + if (pileup != null) + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; - } - else { + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; + } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for(PE p: pileupElementTracker) { + for (PE p : pileupElementTracker) { GATKSAMRecord read = p.getRead(); - if(rgSet != null && !rgSet.isEmpty()) { - if(read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId())) + if (rgSet != null && !rgSet.isEmpty()) { + if (read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId())) filteredTracker.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) + } else { + if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) filteredTracker.add(p); } } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; } } @Override public RBP getPileupForLane(String laneID) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements).getPileupForLane(laneID); - if(pileup != null) - filteredTracker.addElements(sample,pileup.pileupElementTracker); + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForLane(laneID); + if (pileup != null) + filteredTracker.addElements(sample, pileup.pileupElementTracker); } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; - } - else { + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; + } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for(PE p: pileupElementTracker) { + for (PE p : pileupElementTracker) { GATKSAMRecord read = p.getRead(); - if(laneID != null) { - if(read.getReadGroup() != null && - (read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different - (read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same + if (laneID != null) { + if (read.getReadGroup() != null && + (read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different + (read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same filteredTracker.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) + } else { + if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) filteredTracker.add(p); } } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; } } public Collection getSamples() { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; return new HashSet(tracker.getSamples()); - } - else { + } else { Collection sampleNames = new HashSet(); - for(PileupElement p: this) { + for (PileupElement p : this) { GATKSAMRecord read = p.getRead(); String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; sampleNames.add(sampleName); @@ -619,103 +620,98 @@ public abstract class AbstractReadBackedPileup positions = new TreeSet(); - for ( int i = 0; i < desiredCoverage; /* no update */ ) { - if ( positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(size)) ) + for (int i = 0; i < desiredCoverage; /* no update */) { + if (positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(size))) i++; } - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); int current = 0; - for(final String sample: tracker.getSamples()) { + for (final String sample : tracker.getSamples()) { PileupElementTracker perSampleElements = tracker.getElements(sample); List filteredPileup = new ArrayList(); - for(PileupElement p: perSampleElements) { - if(positions.contains(current)) + for (PileupElement p : perSampleElements) { + if (positions.contains(current)) filteredPileup.add(p); } - if(!filteredPileup.isEmpty()) { - AbstractReadBackedPileup pileup = createNewPileup(loc,perSampleElements); - filteredTracker.addElements(sample,pileup.pileupElementTracker); + if (!filteredPileup.isEmpty()) { + AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements); + filteredTracker.addElements(sample, pileup.pileupElementTracker); } current++; } - return (RBP)createNewPileup(loc,filteredTracker); - } - else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + return (RBP) createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); Iterator positionIter = positions.iterator(); - while ( positionIter.hasNext() ) { - int nextReadToKeep = (Integer)positionIter.next(); + while (positionIter.hasNext()) { + int nextReadToKeep = (Integer) positionIter.next(); filteredTracker.add(tracker.get(nextReadToKeep)); } - return (RBP)createNewPileup(getLocation(), filteredTracker); + return (RBP) createNewPileup(getLocation(), filteredTracker); } } @Override public RBP getPileupForSamples(Collection sampleNames) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PileupElementTracker filteredElements = tracker.getElements(sampleNames); - return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null; - } - else { + return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null; + } else { HashSet hashSampleNames = new HashSet(sampleNames); // to speed up the "contains" access in the for loop UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for(PE p: pileupElementTracker) { + for (PE p : pileupElementTracker) { GATKSAMRecord read = p.getRead(); - if(sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else. - if(read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample())) + if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else. + if (read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample())) filteredTracker.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getSample() == null) + } else { + if (read.getReadGroup() == null || read.getReadGroup().getSample() == null) filteredTracker.add(p); } } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; } } @Override public RBP getPileupForSample(String sampleName) { - if(pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; PileupElementTracker filteredElements = tracker.getElements(sampleName); - return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null; - } - else { + return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null; + } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for(PE p: pileupElementTracker) { + for (PE p : pileupElementTracker) { GATKSAMRecord read = p.getRead(); - if(sampleName != null) { - if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) + if (sampleName != null) { + if (read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) filteredTracker.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getSample() == null) + } else { + if (read.getReadGroup() == null || read.getReadGroup().getSample() == null) filteredTracker.add(p); } } - return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null; + return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; } } @@ -727,9 +723,9 @@ public abstract class AbstractReadBackedPileup * for (PileupElement p : this) { doSomething(p); } - * + *

* Provides efficient iteration of the data. * * @return @@ -739,9 +735,17 @@ public abstract class AbstractReadBackedPileup() { private final Iterator wrappedIterator = pileupElementTracker.iterator(); - public boolean hasNext() { return wrappedIterator.hasNext(); } - public PileupElement next() { return wrappedIterator.next(); } - public void remove() { throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); } + public boolean hasNext() { + return wrappedIterator.hasNext(); + } + + public PileupElement next() { + return wrappedIterator.next(); + } + + public void remove() { + throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); + } }; } @@ -784,7 +788,7 @@ public abstract class AbstractReadBackedPileup tracker = (PerSamplePileupElementTracker)pileupElementTracker; - for(final String sample: tracker.getSamples()) { - int[] countsBySample = createNewPileup(loc,tracker.getElements(sample)).getBaseCounts(); - for(int i = 0; i < counts.length; i++) + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + for (final String sample : tracker.getSamples()) { + int[] countsBySample = createNewPileup(loc, tracker.getElements(sample)).getBaseCounts(); + for (int i = 0; i < counts.length; i++) counts[i] += countsBySample[i]; } - } - else { - for ( PileupElement pile : this ) { + } else { + for (PileupElement pile : this) { // skip deletion sites - if ( ! pile.isDeletion() ) { - int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase()); + if (!pile.isDeletion()) { + int index = BaseUtils.simpleBaseToBaseIndex((char) pile.getBase()); if (index != -1) counts[index]++; } @@ -857,65 +860,80 @@ public abstract class AbstractReadBackedPileup getReads() { List reads = new ArrayList(getNumberOfElements()); - for ( PileupElement pile : this ) { reads.add(pile.getRead()); } + for (PileupElement pile : this) { + reads.add(pile.getRead()); + } return reads; } /** * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time + * * @return */ @Override public List getOffsets() { List offsets = new ArrayList(getNumberOfElements()); - for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } + for (PileupElement pile : this) { + offsets.add(pile.getOffset()); + } return offsets; } /** * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time + * * @return */ @Override public byte[] getBases() { byte[] v = new byte[getNumberOfElements()]; int pos = 0; - for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getBase(); } + for (PileupElement pile : pileupElementTracker) { + v[pos++] = pile.getBase(); + } return v; } /** * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time + * * @return */ @Override public byte[] getQuals() { byte[] v = new byte[getNumberOfElements()]; int pos = 0; - for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getQual(); } + for (PileupElement pile : pileupElementTracker) { + v[pos++] = pile.getQual(); + } return v; } /** * Get an array of the mapping qualities + * * @return */ @Override public byte[] getMappingQuals() { byte[] v = new byte[getNumberOfElements()]; int pos = 0; - for ( PileupElement pile : pileupElementTracker ) { v[pos++] = (byte)pile.getRead().getMappingQuality(); } + for (PileupElement pile : pileupElementTracker) { + v[pos++] = (byte) pile.getRead().getMappingQuality(); + } return v; } - static String quals2String( byte[] quals ) { + static String quals2String(byte[] quals) { StringBuilder qualStr = new StringBuilder(); - for ( int qual : quals ) { + for (int qual : quals) { qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 qualStr.append(qualChar); diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 1e5e4d4e5..1d7e6f636 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -12,7 +12,7 @@ import java.util.Arrays; * are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted * in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept. * The insertions that may be present in some reads are not seen at all in such strict reference traversal mode. - * + *

* By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e. * last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is * an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice: @@ -22,9 +22,9 @@ import java.util.Arrays; * (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at * Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed * only once, at the position where the indel maps (starts). - * + *

* This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location. - * + *

* Created by IntelliJ IDEA. * User: asivache * Date: Dec 21, 2009 @@ -39,40 +39,52 @@ public class ExtendedEventPileupElement extends PileupElement { private Type type = null; private int eventLength = -1; private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases - // in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases + // in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases private SAMRecord read; private int offset; // position in the read immediately BEFORE the event // This is broken! offset is always zero because these member variables are shadowed by base class - /** Constructor for extended pileup element (indel). - * - * @param read the read, in which the indel is observed - * @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion) - * @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT) - * @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent) - */ - public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length, byte[] eventBases ) { - super(read, offset); - this.eventLength = length; - if ( length <= 0 ) type = Type.NOEVENT; - else { - if ( eventBases != null ) { - this.eventBases = new String(eventBases).toUpperCase(); - type = Type.INSERTION; - } else { - type = Type.DELETION; - } - } + + public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) { + super(read, offset, type == Type.DELETION); + this.read = read; + this.offset = offset; + this.eventLength = eventLength; + this.eventBases = eventBases; + this.type = type; } - /** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should - * be null or are ignored in these cases anyway) - * @param read - * @param offset - * @param length + /** + * Quick constructor for insertions. + * + * @param read the read, in which the indel is observed + * @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion) + * @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT) + * @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent) */ - public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length ) { - this(read,offset, length, null); + public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int length, byte[] eventBases) { + this(read, offset, length, new String(eventBases).toUpperCase(), Type.INSERTION); + } + + /** + * Quick constructor for non indels (matches) + * + * @param read the read + * @param offset where in the read the match is + */ + public ExtendedEventPileupElement(GATKSAMRecord read, int offset) { + this(read, offset, -1, null, Type.NOEVENT); + } + + /** + * Quick constructor for deletions + * + * @param read the read + * @param offset the last base before the deletion starts (left aligned deletion) + * @param length length of this deletion + */ + public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int length) { + this(read, offset, length, null, Type.DELETION); } public boolean isDeletion() { @@ -87,46 +99,54 @@ public class ExtendedEventPileupElement extends PileupElement { return isDeletion() || isInsertion(); } - public Type getType() { return type; } + public Type getType() { + return type; + } // The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with // a valid base quality. The following code attempts to compensate for that.' @Override public byte getBase() { - return getBase(offset >= 0 ? offset : offset+eventLength); + return getBase(offset >= 0 ? offset : offset + eventLength); } @Override public int getBaseIndex() { - return getBaseIndex(offset >= 0 ? offset : offset+eventLength); + return getBaseIndex(offset >= 0 ? offset : offset + eventLength); } @Override public byte getQual() { - return getQual(offset >= 0 ? offset : offset+eventLength); + return getQual(offset >= 0 ? offset : offset + eventLength); } - /** Returns length of the event (number of inserted or deleted bases */ - public int getEventLength() { return eventLength; } + /** + * Returns length of the event (number of inserted or deleted bases + */ + public int getEventLength() { + return eventLength; + } - /** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. - * */ - public String getEventBases() { return eventBases; } + /** + * Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. + */ + public String getEventBases() { + return eventBases; + } @Override public String toString() { char c = '.'; - String fillStr = null ; - if ( isDeletion() ) { + String fillStr = null; + if (isDeletion()) { c = '-'; - char [] filler = new char[eventLength]; + char[] filler = new char[eventLength]; Arrays.fill(filler, 'D'); fillStr = new String(filler); - } - else if ( isInsertion() ) c = '+'; - return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()? - (isInsertion() ? eventBases : fillStr ): "", getMappingQual()); + } else if (isInsertion()) c = '+'; + return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel() ? + (isInsertion() ? eventBases : fillStr) : "", getMappingQual()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 2d13d6e59..73f010d40 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.pileup; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** @@ -21,25 +22,61 @@ public class PileupElement implements Comparable { protected final GATKSAMRecord read; protected final int offset; + protected final boolean isDeletion; + + /** + * Creates a new pileup element. + * + * @param read the read we are adding to the pileup + * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions) + * @param isDeletion whether or not this base is a deletion + */ @Requires({ "read != null", "offset >= -1", "offset <= read.getReadLength()"}) - public PileupElement( GATKSAMRecord read, int offset ) { + public PileupElement(GATKSAMRecord read, int offset, boolean isDeletion) { + if (offset < 0 && isDeletion) + throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset"); + this.read = read; this.offset = offset; + this.isDeletion = isDeletion; } + // /** +// * Creates a NON DELETION pileup element. +// * +// * use this constructor only for insertions and matches/mismatches. +// * @param read the read we are adding to the pileup +// * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions) +// */ +// @Requires({ +// "read != null", +// "offset >= -1", +// "offset <= read.getReadLength()"}) +// public PileupElement( GATKSAMRecord read, int offset ) { +// this(read, offset, false); +// } +// public boolean isDeletion() { + return isDeletion; + } + + public boolean isInsertionAtBeginningOfRead() { return offset == -1; } @Ensures("result != null") - public GATKSAMRecord getRead() { return read; } + public GATKSAMRecord getRead() { + return read; + } @Ensures("result == offset") - public int getOffset() { return offset; } + public int getOffset() { + return offset; + } public byte getBase() { return getBase(offset); @@ -59,30 +96,30 @@ public class PileupElement implements Comparable { @Ensures("result != null") public String toString() { - return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual()); + return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char) getBase(), getQual()); } protected byte getBase(final int offset) { - return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; + return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]; } protected int getBaseIndex(final int offset) { - return BaseUtils.simpleBaseToBaseIndex(isDeletion() ? DELETION_BASE : read.getReadBases()[offset]); + return BaseUtils.simpleBaseToBaseIndex((isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_BASE : read.getReadBases()[offset]); } protected byte getQual(final int offset) { - return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; + return (isDeletion() || isInsertionAtBeginningOfRead()) ? DELETION_QUAL : read.getBaseQualities()[offset]; } @Override public int compareTo(final PileupElement pileupElement) { - if ( offset < pileupElement.offset ) + if (offset < pileupElement.offset) return -1; - else if ( offset > pileupElement.offset ) + else if (offset > pileupElement.offset) return 1; - else if ( read.getAlignmentStart() < pileupElement.read.getAlignmentStart() ) + else if (read.getAlignmentStart() < pileupElement.read.getAlignmentStart()) return -1; - else if ( read.getAlignmentStart() > pileupElement.read.getAlignmentStart() ) + else if (read.getAlignmentStart() > pileupElement.read.getAlignmentStart()) return 1; else return 0; @@ -94,13 +131,26 @@ public class PileupElement implements Comparable { // // -------------------------------------------------------------------------- - public boolean isReducedRead() { - return read.isReducedRead(); - } +// public boolean isReducedRead() { +// return read.isReducedRead(); +// } + /** + * Returns the number of elements in the pileup element. + *

+ * Unless this is a reduced read, the number of elements in a pileup element is one. In the event of + * this being a reduced read and a deletion, we return the average number of elements between the left + * and right elements to the deletion. We assume the deletion to be left aligned. + * + * @return + */ public int getRepresentativeCount() { - // TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed - return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1; + int representativeCount = 1; + + if (read.isReducedRead() && !isInsertionAtBeginningOfRead()) + representativeCount = (isDeletion()) ? Math.round((read.getReducedCount(offset) + read.getReducedCount(offset + 1)) / 2) : read.getReducedCount(offset); + + return representativeCount; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index 43ad06352..bf67d1a70 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -30,33 +30,34 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; -public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { +public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { private int nInsertions; private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileupElements) { - super(loc,pileupElements); + super(loc, pileupElements); } public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, PileupElementTracker tracker) { - super(loc,tracker); + super(loc, tracker); } /** * Optimization of above constructor where all of the cached data is provided + * * @param loc * @param pileup */ public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileup, int size, - int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads) { - super(loc,pileup,size,nDeletions,nMQ0Reads); + int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads) { + super(loc, pileup, size, nDeletions, nMQ0Reads); this.maxDeletionLength = maxDeletionLength; this.nInsertions = nInsertions; } // this is the good new one - public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map pileupElementsBySample) { - super(loc,pileupElementsBySample); + public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map pileupElementsBySample) { + super(loc, pileupElementsBySample); } /** @@ -71,31 +72,31 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< nInsertions = 0; nMQ0Reads = 0; - for ( ExtendedEventPileupElement p : this.toExtendedIterable() ) { + for (ExtendedEventPileupElement p : this.toExtendedIterable()) { - if ( p.isDeletion() ) { + if (p.isDeletion()) { maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength()); } else { - if ( p.isInsertion() ) nInsertions++; + if (p.isInsertion()) nInsertions++; } } } @Override - protected void addPileupToCumulativeStats(AbstractReadBackedPileup pileup) { + protected void addPileupToCumulativeStats(AbstractReadBackedPileup pileup) { super.addPileupToCumulativeStats(pileup); - ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup)pileup); + ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup) pileup); this.nInsertions += extendedEventPileup.getNumberOfInsertions(); this.maxDeletionLength += extendedEventPileup.getMaxDeletionLength(); } @Override protected ReadBackedExtendedEventPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker tracker) { - return new ReadBackedExtendedEventPileupImpl(loc,tracker); + return new ReadBackedExtendedEventPileupImpl(loc, tracker); } @Override - protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset) { + protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) { throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); } @@ -110,10 +111,12 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< return nInsertions; } - /** Returns the length of the longest deletion observed at the site this + /** + * Returns the length of the longest deletion observed at the site this * pileup is associated with (NOTE: by convention, both insertions and deletions * are associated with genomic location immediately before the actual event). If * there are no deletions at the site, returns 0. + * * @return */ @Override @@ -123,36 +126,47 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< public Iterable toExtendedIterable() { return new Iterable() { - public Iterator iterator() { return pileupElementTracker.iterator(); } + public Iterator iterator() { + return pileupElementTracker.iterator(); + } }; } /** * Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time + * * @return */ @Override public byte[] getEvents() { byte[] v = new byte[getNumberOfElements()]; int i = 0; - for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) { - switch ( e.getType() ) { - case INSERTION: v[i] = 'I'; break; - case DELETION: v[i] = 'D'; break; - case NOEVENT: v[i] = '.'; break; - default: throw new ReviewedStingException("Unknown event type encountered: "+e.getType()); + for (ExtendedEventPileupElement e : this.toExtendedIterable()) { + switch (e.getType()) { + case INSERTION: + v[i] = 'I'; + break; + case DELETION: + v[i] = 'D'; + break; + case NOEVENT: + v[i] = '.'; + break; + default: + throw new ReviewedStingException("Unknown event type encountered: " + e.getType()); } i++; } return v; - } + } - /** A shortcut for getEventStringsWithCounts(null); + /** + * A shortcut for getEventStringsWithCounts(null); * * @return */ @Override - public List> getEventStringsWithCounts() { + public List> getEventStringsWithCounts() { return getEventStringsWithCounts(null); } @@ -163,44 +177,48 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< // insertion, deletion or no-event, respectively. return String.format("%s %s E %s", getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - new String(getEvents()) ); + new String(getEvents())); } - /** Returns String representation of all distinct extended events (indels) at the site along with + /** + * Returns String representation of all distinct extended events (indels) at the site along with * observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for * deletions will be generated as "D" (i.e. "5D"); if the reference bases are provided, the actual * deleted sequence will be used in the string representation (e.g. "-AAC"). - * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and - * extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+) + * + * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and + * extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+) * @return list of distinct events; first element of a pair is a string representation of the event, second element - * gives the number of reads, in which that event was observed + * gives the number of reads, in which that event was observed */ @Override - public List> getEventStringsWithCounts(byte[] refBases) { - Map events = new HashMap(); + public List> getEventStringsWithCounts(byte[] refBases) { + Map events = new HashMap(); - for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) { + for (ExtendedEventPileupElement e : this.toExtendedIterable()) { Integer cnt; String indel = null; - switch ( e.getType() ) { + switch (e.getType()) { case INSERTION: - indel = "+"+e.getEventBases(); + indel = "+" + e.getEventBases(); break; case DELETION: - indel = getDeletionString(e.getEventLength(),refBases); + indel = getDeletionString(e.getEventLength(), refBases); break; - case NOEVENT: continue; - default: throw new ReviewedStingException("Unknown event type encountered: "+e.getType()); + case NOEVENT: + continue; + default: + throw new ReviewedStingException("Unknown event type encountered: " + e.getType()); } cnt = events.get(indel); - if ( cnt == null ) events.put(indel,1); - else events.put(indel,cnt.intValue()+1); + if (cnt == null) events.put(indel, 1); + else events.put(indel, cnt.intValue() + 1); } - List> eventList = new ArrayList>(events.size()); - for ( Map.Entry m : events.entrySet() ) { - eventList.add( new Pair(m.getKey(),m.getValue())); + List> eventList = new ArrayList>(events.size()); + for (Map.Entry m : events.entrySet()) { + eventList.add(new Pair(m.getKey(), m.getValue())); } return eventList; } @@ -208,18 +226,19 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< /** * Builds string representation of the deletion event. If refBases is null, the representation will be * "D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC") - * will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted + * will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted * base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the * deletion length (i.e. size of refBase must be at least length+1). + * * @param length * @param refBases * @return */ private String getDeletionString(int length, byte[] refBases) { - if ( refBases == null ) { - return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D" + if (refBases == null) { + return Integer.toString(length) + "D"; // if we do not have reference bases, we can only report something like "5D" } else { - return "-"+new String(refBases,1,length).toUpperCase(); + return "-" + new String(refBases, 1, length).toUpperCase(); } } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index b7445be8d..66ddbe95d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -29,48 +29,49 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.List; import java.util.Map; -public class ReadBackedPileupImpl extends AbstractReadBackedPileup implements ReadBackedPileup { +public class ReadBackedPileupImpl extends AbstractReadBackedPileup implements ReadBackedPileup { public ReadBackedPileupImpl(GenomeLoc loc) { super(loc); } - public ReadBackedPileupImpl(GenomeLoc loc, List reads, List offsets ) { - super(loc,reads,offsets); + public ReadBackedPileupImpl(GenomeLoc loc, List reads, List offsets) { + super(loc, reads, offsets); } - public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset ) { - super(loc,reads,offset); + public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset) { + super(loc, reads, offset); } public ReadBackedPileupImpl(GenomeLoc loc, List pileupElements) { - super(loc,pileupElements); + super(loc, pileupElements); } - public ReadBackedPileupImpl(GenomeLoc loc, Map pileupElementsBySample) { - super(loc,pileupElementsBySample); + public ReadBackedPileupImpl(GenomeLoc loc, Map pileupElementsBySample) { + super(loc, pileupElementsBySample); } /** * Optimization of above constructor where all of the cached data is provided + * * @param loc * @param pileup */ public ReadBackedPileupImpl(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads) { - super(loc,pileup,size,nDeletions,nMQ0Reads); + super(loc, pileup, size, nDeletions, nMQ0Reads); } protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker tracker) { - super(loc,tracker); + super(loc, tracker); } @Override protected ReadBackedPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker tracker) { - return new ReadBackedPileupImpl(loc,tracker); + return new ReadBackedPileupImpl(loc, tracker); } @Override - protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) { - return new PileupElement(read,offset); + protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) { + return new PileupElement(read, offset, isDeletion); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index b8e892101..3b2736418 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -1,745 +1,774 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.sam; - -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; - - -public class AlignmentUtils { - - public static class MismatchCount { - public int numMismatches = 0; - public long mismatchQualities = 0; - } - - public static long mismatchingQualities(SAMRecord r, byte[] refSeq, int refIndex) { - return getMismatchCount(r, refSeq, refIndex).mismatchQualities; - } - - public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) { - return getMismatchCount(r,refSeq,refIndex,0,r.getReadLength()); - } - - // todo -- this code and mismatchesInRefWindow should be combined and optimized into a single - // todo -- high performance implementation. We can do a lot better than this right now - public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) { - MismatchCount mc = new MismatchCount(); - - int readIdx = 0; - int endOnRead = startOnRead + nReadBases - 1; // index of the last base on read we want to count - byte[] readSeq = r.getReadBases(); - Cigar c = r.getCigar(); - for (int i = 0 ; i < c.numCigarElements() ; i++) { - - if ( readIdx > endOnRead ) break; - - CigarElement ce = c.getCigarElement(i); - switch ( ce.getOperator() ) { - case M: - for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) { - if ( refIndex >= refSeq.length ) - continue; - if ( readIdx < startOnRead ) continue; - if ( readIdx > endOnRead ) break; - byte refChr = refSeq[refIndex]; - byte readChr = readSeq[readIdx]; - // Note: we need to count X/N's as mismatches because that's what SAM requires - //if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 || - // BaseUtils.simpleBaseToBaseIndex(refChr) == -1 ) - // continue; // do not count Ns/Xs/etc ? - if ( readChr != refChr ) { - mc.numMismatches++; - mc.mismatchQualities += r.getBaseQualities()[readIdx]; - } - } - break; - case I: - case S: - readIdx += ce.getLength(); - break; - case D: - case N: - refIndex += ce.getLength(); - break; - case H: - case P: - break; - default: throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported"); - } - - } - return mc; - } - - /** Returns the number of mismatches in the pileup within the given reference context. - * - * @param pileup the pileup with reads - * @param ref the reference context - * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @return the number of mismatches - */ - public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) { - int mismatches = 0; - for ( PileupElement p : pileup ) - mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite); - return mismatches; - } - - /** Returns the number of mismatches in the pileup element within the given reference context. - * - * @param p the pileup element - * @param ref the reference context - * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @return the number of mismatches - */ - public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) { - return mismatchesInRefWindow(p, ref, ignoreTargetSite, false); - } - - /** Returns the number of mismatches in the pileup element within the given reference context. - * - * @param p the pileup element - * @param ref the reference context - * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @param qualitySumInsteadOfMismatchCount if true, return the quality score sum of the mismatches rather than the count - * @return the number of mismatches - */ - public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite, boolean qualitySumInsteadOfMismatchCount) { - int sum = 0; - - int windowStart = ref.getWindow().getStart(); - int windowStop = ref.getWindow().getStop(); - byte[] refBases = ref.getBases(); - byte[] readBases = p.getRead().getReadBases(); - byte[] readQualities = p.getRead().getBaseQualities(); - Cigar c = p.getRead().getCigar(); - - int readIndex = 0; - int currentPos = p.getRead().getAlignmentStart(); - int refIndex = Math.max(0, currentPos - windowStart); - - for (int i = 0 ; i < c.numCigarElements() ; i++) { - CigarElement ce = c.getCigarElement(i); - int cigarElementLength = ce.getLength(); - switch ( ce.getOperator() ) { - case M: - for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) { - // are we past the ref window? - if ( currentPos > windowStop ) - break; - - // are we before the ref window? - if ( currentPos < windowStart ) - continue; - - byte refChr = refBases[refIndex++]; - - // do we need to skip the target site? - if ( ignoreTargetSite && ref.getLocus().getStart() == currentPos ) - continue; - - byte readChr = readBases[readIndex]; - if ( readChr != refChr ) - sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1; - } - break; - case I: - case S: - readIndex += cigarElementLength; - break; - case D: - case N: - currentPos += cigarElementLength; - if ( currentPos > windowStart ) - refIndex += Math.min(cigarElementLength, currentPos - windowStart); - break; - case H: - case P: - break; - } - } - - return sum; - } - - /** Returns the number of mismatches in the pileup element within the given reference context. - * - * @param read the SAMRecord - * @param ref the reference context - * @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good - * @param windowSize window size (on each side) to test - * @return a bitset representing which bases are good - */ - public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) { - // first determine the positions with mismatches - int readLength = read.getReadLength(); - BitSet mismatches = new BitSet(readLength); - - // it's possible we aren't starting at the beginning of a read, - // and we don't need to look at any of the previous context outside our window - // (although we do need future context) - int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize); - int currentReadPos = read.getAlignmentStart(); - - byte[] refBases = ref.getBases(); - int refIndex = readStartPos - ref.getWindow().getStart(); - if ( refIndex < 0 ) { - throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus()); - } - - byte[] readBases = read.getReadBases(); - int readIndex = 0; - - Cigar c = read.getCigar(); - - for (int i = 0 ; i < c.numCigarElements() ; i++) { - CigarElement ce = c.getCigarElement(i); - int cigarElementLength = ce.getLength(); - switch ( ce.getOperator() ) { - case M: - for (int j = 0; j < cigarElementLength; j++, readIndex++) { - // skip over unwanted bases - if ( currentReadPos++ < readStartPos ) - continue; - - // this is possible if reads extend beyond the contig end - if ( refIndex >= refBases.length ) - break; - - byte refChr = refBases[refIndex]; - byte readChr = readBases[readIndex]; - if ( readChr != refChr ) - mismatches.set(readIndex); - - refIndex++; - } - break; - case I: - case S: - readIndex += cigarElementLength; - break; - case D: - case N: - if ( currentReadPos >= readStartPos ) - refIndex += cigarElementLength; - currentReadPos += cigarElementLength; - break; - case H: - case P: - break; - } - } - - // all bits are set to false by default - BitSet result = new BitSet(readLength); - - int currentPos = 0, leftPos = 0, rightPos; - int mismatchCount = 0; - - // calculate how many mismatches exist in the windows to the left/right - for ( rightPos = 1; rightPos <= windowSize && rightPos < readLength; rightPos++) { - if ( mismatches.get(rightPos) ) - mismatchCount++; - } - if ( mismatchCount <= maxMismatches ) - result.set(currentPos); - - // now, traverse over the read positions - while ( currentPos < readLength ) { - // add a new rightmost position - if ( rightPos < readLength && mismatches.get(rightPos++) ) - mismatchCount++; - // re-penalize the previous position - if ( mismatches.get(currentPos++) ) - mismatchCount++; - // don't penalize the current position - if ( mismatches.get(currentPos) ) - mismatchCount--; - // subtract the leftmost position - if ( leftPos < currentPos - windowSize && mismatches.get(leftPos++) ) - mismatchCount--; - - if ( mismatchCount <= maxMismatches ) - result.set(currentPos); - } - - return result; - } - /** Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment. - * This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but - * it only counts blocks without actually allocating and filling the list of blocks themselves. Hence, this method is - * a much more efficient alternative to r.getAlignmentBlocks.size() in the situations when this number is all that is needed. - * Formally, this method simply returns the number of M elements in the cigar. - * @param r alignment - * @return number of continuous alignment blocks (i.e. 'M' elements of the cigar; all indel and clipping elements are ignored). - */ - public static int getNumAlignmentBlocks(final SAMRecord r) { - int n = 0; - final Cigar cigar = r.getCigar(); - if (cigar == null) return 0; - - for (final CigarElement e : cigar.getCigarElements()) { - if (e.getOperator() == CigarOperator.M ) n++; - } - - return n; - } - - public static int getNumAlignedBases(final SAMRecord r) { - int n = 0; - final Cigar cigar = r.getCigar(); - if (cigar == null) return 0; - - for (final CigarElement e : cigar.getCigarElements()) { - if (e.getOperator() == CigarOperator.M ) { n += e.getLength(); } - } - - return n; - } - - public static byte[] alignmentToByteArray( final Cigar cigar, final byte[] read, final byte[] ref ) { - - final byte[] alignment = new byte[read.length]; - int refPos = 0; - int alignPos = 0; - - for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch( ce.getOperator() ) { - case I: - case S: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - alignment[alignPos++] = '+'; - } - break; - case D: - case N: - refPos += elementLength; - break; - case M: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - alignment[alignPos] = ref[refPos]; - alignPos++; - refPos++; - } - break; - case H: - case P: - break; - default: - throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() ); - } - } - return alignment; - } - - public static int calcAlignmentByteArrayOffset( final Cigar cigar, int pileupOffset, final int alignmentStart, final int refLocus ) { - - boolean atDeletion = false; - if(pileupOffset == -1) { - atDeletion = true; - pileupOffset = refLocus - alignmentStart; - final CigarElement ce = cigar.getCigarElement(0); - if( ce.getOperator() == CigarOperator.S ) { - pileupOffset += ce.getLength(); - } - } - int pos = 0; - int alignmentPos = 0; - for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch( ce.getOperator() ) { - case I: - case S: - pos += elementLength; - if( pos >= pileupOffset ) { - return alignmentPos; - } - break; - case D: - case N: - if(!atDeletion) { - alignmentPos += elementLength; - } else { - if( pos + elementLength - 1 >= pileupOffset ) { - return alignmentPos + (pileupOffset - pos); - } else { - pos += elementLength; - alignmentPos += elementLength; - } - } - break; - case M: - if( pos + elementLength - 1 >= pileupOffset ) { - return alignmentPos + (pileupOffset - pos); - } else { - pos += elementLength; - alignmentPos += elementLength; - } - break; - case H: - case P: - break; - default: - throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() ); - } - } - return alignmentPos; - } - - public static byte[] readToAlignmentByteArray( final Cigar cigar, final byte[] read ) { - - int alignmentLength = 0; - for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch( ce.getOperator() ) { - case I: - case S: - break; - case D: - case N: - alignmentLength += elementLength; - break; - case M: - alignmentLength += elementLength; - break; - case H: - case P: - break; - default: - throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() ); - } - } - - final byte[] alignment = new byte[alignmentLength]; - int alignPos = 0; - int readPos = 0; - for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch( ce.getOperator() ) { - case I: - if( alignPos > 0 ) { - if( alignment[alignPos-1] == BaseUtils.A ) { alignment[alignPos-1] = PileupElement.A_FOLLOWED_BY_INSERTION_BASE; } - else if( alignment[alignPos-1] == BaseUtils.C ) { alignment[alignPos-1] = PileupElement.C_FOLLOWED_BY_INSERTION_BASE; } - else if( alignment[alignPos-1] == BaseUtils.T ) { alignment[alignPos-1] = PileupElement.T_FOLLOWED_BY_INSERTION_BASE; } - else if( alignment[alignPos-1] == BaseUtils.G ) { alignment[alignPos-1] = PileupElement.G_FOLLOWED_BY_INSERTION_BASE; } - } - case S: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - readPos++; - } - break; - case D: - case N: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - alignment[alignPos] = PileupElement.DELETION_BASE; - alignPos++; - } - break; - case M: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - alignment[alignPos] = read[readPos]; - alignPos++; - readPos++; - } - break; - case H: - case P: - break; - default: - throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() ); - } - } - return alignment; - } - - /** - * Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format - * specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and - * alignment reference index/start. - * @param r record - * @return true if read is unmapped - */ - public static boolean isReadUnmapped(final SAMRecord r) { - if ( r.getReadUnmappedFlag() ) return true; - - // our life would be so much easier if all sam files followed the specs. In reality, - // sam files (including those generated by maq or bwa) miss headers altogether. When - // reading such a SAM file, reference name is set, but since there is no sequence dictionary, - // null is always returned for referenceIndex. Let's be paranoid here, and make sure that - // we do not call the read "unmapped" when it has only reference name set with ref. index missing - // or vice versa. - if ( ( r.getReferenceIndex() != null && r.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX - || r.getReferenceName() != null && !r.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME) ) - && r.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) return false ; - return true; - } - - /** - * Due to (unfortunate) multiple ways to indicate that read/mate is unmapped allowed by SAM format - * specification, one may need this convenience shortcut. Checks both 'mate unmapped' flag and - * alignment reference index/start of the mate. - * @param r sam record for the read - * @return true if read's mate is unmapped - */ - public static boolean isMateUnmapped(final SAMRecord r) { - if ( r.getMateUnmappedFlag() ) return true; - - // our life would be so much easier if all sam files followed the specs. In reality, - // sam files (including those generated by maq or bwa) miss headers altogether. When - // reading such a SAM file, reference name is set, but since there is no sequence dictionary, - // null is always returned for referenceIndex. Let's be paranoid here, and make sure that - // we do not call the read "unmapped" when it has only reference name set with ref. index missing - // or vice versa. - if ( ( r.getMateReferenceIndex() != null && r.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX - || r.getMateReferenceName() != null && !r.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME) ) - && r.getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START ) return false ; - return true; - } - - /** Returns true is read is mapped and mapped uniquely (Q>0). - * - * @param read - * @return - */ - public static boolean isReadUniquelyMapped(SAMRecord read) { - return ( ! AlignmentUtils.isReadUnmapped(read) ) && read.getMappingQuality() > 0; - } - - /** Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from - * cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base - * qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array - * of read's base qualitites is inverted (in this case new array is allocated and returned). - * @param read - * @return - */ - public static byte [] getQualsInCycleOrder(SAMRecord read) { - if ( isReadUnmapped(read) || ! read.getReadNegativeStrandFlag() ) return read.getBaseQualities(); - - return Utils.reverse(read.getBaseQualities()); - } - - /** Returns the array of original base qualitites (before recalibration) in the order the bases were read on the machine (i.e. always starting from - * cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base - * qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array - * of read's base qualitites is inverted (in this case new array is allocated and returned). If no original base qualities - * are available this method will throw a runtime exception. - * @param read - * @return - */ - public static byte [] getOriginalQualsInCycleOrder(SAMRecord read) { - if ( isReadUnmapped(read) || ! read.getReadNegativeStrandFlag() ) return read.getOriginalBaseQualities(); - - return Utils.reverse(read.getOriginalBaseQualities()); - } - - /** Takes the alignment of the read sequence readSeq to the reference sequence refSeq - * starting at 0-based position refIndex on the refSeq and specified by its cigar. - * The last argument readIndex specifies 0-based position on the read where the alignment described by the - * cigar starts. Usually cigars specify alignments of the whole read to the ref, so that readIndex is normally 0. - * Use non-zero readIndex only when the alignment cigar represents alignment of a part of the read. The refIndex in this case - * should be the position where the alignment of that part of the read starts at. In other words, both refIndex and readIndex are - * always the positions where the cigar starts on the ref and on the read, respectively. - * - * If the alignment has an indel, then this method attempts moving this indel left across a stretch of repetitive bases. For instance, if the original cigar - * specifies that (any) one AT is deleted from a repeat sequence TATATATA, the output cigar will always mark the leftmost AT - * as deleted. If there is no indel in the original cigar, or the indel position is determined unambiguously (i.e. inserted/deleted sequence - * is not repeated), the original cigar is returned. - * @param cigar structure of the original alignment - * @param refSeq reference sequence the read is aligned to - * @param readSeq read sequence - * @param refIndex 0-based alignment start position on ref - * @param readIndex 0-based alignment start position on read - * @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any) - */ - public static Cigar leftAlignIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) { - - int indexOfIndel = -1; - for ( int i = 0; i < cigar.numCigarElements(); i++ ) { - CigarElement ce = cigar.getCigarElement(i); - if ( ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I ) { - // if there is more than 1 indel, don't left align - if ( indexOfIndel != -1 ) - return cigar; - indexOfIndel = i; - } - } - - // if there is no indel or if the alignment starts with an insertion (so that there - // is no place on the read to move that insertion further left), we are done - if ( indexOfIndel < 1 ) return cigar; - - final int indelLength = cigar.getCigarElement(indexOfIndel).getLength(); - - byte[] altString = createIndelString(cigar, indexOfIndel, refSeq, readSeq, refIndex, readIndex); - if ( altString == null ) - return cigar; - - Cigar newCigar = cigar; - for ( int i = 0; i < indelLength; i++ ) { - newCigar = moveCigarLeft(newCigar, indexOfIndel); - byte[] newAltString = createIndelString(newCigar, indexOfIndel, refSeq, readSeq, refIndex, readIndex); - - // check to make sure we haven't run off the end of the read - boolean reachedEndOfRead = cigarHasZeroSizeElement(newCigar); - - if ( Arrays.equals(altString, newAltString) ) { - cigar = newCigar; - i = -1; - if ( reachedEndOfRead ) - cigar = cleanUpCigar(cigar); - } - - if ( reachedEndOfRead ) - break; - } - - return cigar; - } - - private static boolean cigarHasZeroSizeElement(Cigar c) { - for ( CigarElement ce : c.getCigarElements() ) { - if ( ce.getLength() == 0 ) - return true; - } - return false; - } - - private static Cigar cleanUpCigar(Cigar c) { - ArrayList elements = new ArrayList(c.numCigarElements()-1); - for ( CigarElement ce : c.getCigarElements() ) { - if ( ce.getLength() != 0 && - (elements.size() != 0 || ce.getOperator() != CigarOperator.D) ) { - elements.add(ce); - } - } - return new Cigar(elements); - } - - private static Cigar moveCigarLeft(Cigar cigar, int indexOfIndel) { - // get the first few elements - ArrayList elements = new ArrayList(cigar.numCigarElements()); - for ( int i = 0; i < indexOfIndel - 1; i++) - elements.add(cigar.getCigarElement(i)); - - // get the indel element and move it left one base - CigarElement ce = cigar.getCigarElement(indexOfIndel-1); - elements.add(new CigarElement(ce.getLength()-1, ce.getOperator())); - elements.add(cigar.getCigarElement(indexOfIndel)); - if ( indexOfIndel+1 < cigar.numCigarElements() ) { - ce = cigar.getCigarElement(indexOfIndel+1); - elements.add(new CigarElement(ce.getLength()+1, ce.getOperator())); - } else { - elements.add(new CigarElement(1, CigarOperator.M)); - } - - // get the last few elements - for ( int i = indexOfIndel + 2; i < cigar.numCigarElements(); i++) - elements.add(cigar.getCigarElement(i)); - return new Cigar(elements); - } - - private static byte[] createIndelString(final Cigar cigar, final int indexOfIndel, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) { - CigarElement indel = cigar.getCigarElement(indexOfIndel); - int indelLength = indel.getLength(); - - int totalRefBases = 0; - for ( int i = 0; i < indexOfIndel; i++ ) { - CigarElement ce = cigar.getCigarElement(i); - int length = ce.getLength(); - - switch( ce.getOperator() ) { - case M: - readIndex += length; - refIndex += length; - totalRefBases += length; - break; - case S: - readIndex += length; - break; - case N: - refIndex += length; - totalRefBases += length; - break; - default: - break; - } - } - - // sometimes, when there are very large known indels, we won't have enough reference sequence to cover them - if ( totalRefBases + indelLength > refSeq.length ) - indelLength -= (totalRefBases + indelLength - refSeq.length); - - // the indel-based reference string - byte[] alt = new byte[refSeq.length + (indelLength * (indel.getOperator() == CigarOperator.D ? -1 : 1))]; - - // add the bases before the indel, making sure it's not aligned off the end of the reference - if ( refIndex > alt.length || refIndex > refSeq.length ) - return null; - System.arraycopy(refSeq, 0, alt, 0, refIndex); - int currentPos = refIndex; - - // take care of the indel - if ( indel.getOperator() == CigarOperator.D ) { - refIndex += indelLength; - } else { - System.arraycopy(readSeq, readIndex, alt, currentPos, indelLength); - currentPos += indelLength; - } - - // add the bases after the indel, making sure it's not aligned off the end of the reference - if ( refSeq.length - refIndex > alt.length - currentPos ) - return null; - System.arraycopy(refSeq, refIndex, alt, currentPos, refSeq.length - refIndex); - - return alt; - } -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; + + +public class AlignmentUtils { + + public static class MismatchCount { + public int numMismatches = 0; + public long mismatchQualities = 0; + } + + public static long mismatchingQualities(SAMRecord r, byte[] refSeq, int refIndex) { + return getMismatchCount(r, refSeq, refIndex).mismatchQualities; + } + + public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex) { + return getMismatchCount(r, refSeq, refIndex, 0, r.getReadLength()); + } + + // todo -- this code and mismatchesInRefWindow should be combined and optimized into a single + // todo -- high performance implementation. We can do a lot better than this right now + public static MismatchCount getMismatchCount(SAMRecord r, byte[] refSeq, int refIndex, int startOnRead, int nReadBases) { + MismatchCount mc = new MismatchCount(); + + int readIdx = 0; + int endOnRead = startOnRead + nReadBases - 1; // index of the last base on read we want to count + byte[] readSeq = r.getReadBases(); + Cigar c = r.getCigar(); + for (int i = 0; i < c.numCigarElements(); i++) { + + if (readIdx > endOnRead) break; + + CigarElement ce = c.getCigarElement(i); + switch (ce.getOperator()) { + case M: + for (int j = 0; j < ce.getLength(); j++, refIndex++, readIdx++) { + if (refIndex >= refSeq.length) + continue; + if (readIdx < startOnRead) continue; + if (readIdx > endOnRead) break; + byte refChr = refSeq[refIndex]; + byte readChr = readSeq[readIdx]; + // Note: we need to count X/N's as mismatches because that's what SAM requires + //if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 || + // BaseUtils.simpleBaseToBaseIndex(refChr) == -1 ) + // continue; // do not count Ns/Xs/etc ? + if (readChr != refChr) { + mc.numMismatches++; + mc.mismatchQualities += r.getBaseQualities()[readIdx]; + } + } + break; + case I: + case S: + readIdx += ce.getLength(); + break; + case D: + case N: + refIndex += ce.getLength(); + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported"); + } + + } + return mc; + } + + /** + * Returns the number of mismatches in the pileup within the given reference context. + * + * @param pileup the pileup with reads + * @param ref the reference context + * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) + * @return the number of mismatches + */ + public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) { + int mismatches = 0; + for (PileupElement p : pileup) + mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite); + return mismatches; + } + + /** + * Returns the number of mismatches in the pileup element within the given reference context. + * + * @param p the pileup element + * @param ref the reference context + * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) + * @return the number of mismatches + */ + public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) { + return mismatchesInRefWindow(p, ref, ignoreTargetSite, false); + } + + /** + * Returns the number of mismatches in the pileup element within the given reference context. + * + * @param p the pileup element + * @param ref the reference context + * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) + * @param qualitySumInsteadOfMismatchCount + * if true, return the quality score sum of the mismatches rather than the count + * @return the number of mismatches + */ + public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite, boolean qualitySumInsteadOfMismatchCount) { + int sum = 0; + + int windowStart = ref.getWindow().getStart(); + int windowStop = ref.getWindow().getStop(); + byte[] refBases = ref.getBases(); + byte[] readBases = p.getRead().getReadBases(); + byte[] readQualities = p.getRead().getBaseQualities(); + Cigar c = p.getRead().getCigar(); + + int readIndex = 0; + int currentPos = p.getRead().getAlignmentStart(); + int refIndex = Math.max(0, currentPos - windowStart); + + for (int i = 0; i < c.numCigarElements(); i++) { + CigarElement ce = c.getCigarElement(i); + int cigarElementLength = ce.getLength(); + switch (ce.getOperator()) { + case M: + for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) { + // are we past the ref window? + if (currentPos > windowStop) + break; + + // are we before the ref window? + if (currentPos < windowStart) + continue; + + byte refChr = refBases[refIndex++]; + + // do we need to skip the target site? + if (ignoreTargetSite && ref.getLocus().getStart() == currentPos) + continue; + + byte readChr = readBases[readIndex]; + if (readChr != refChr) + sum += (qualitySumInsteadOfMismatchCount) ? readQualities[readIndex] : 1; + } + break; + case I: + case S: + readIndex += cigarElementLength; + break; + case D: + case N: + currentPos += cigarElementLength; + if (currentPos > windowStart) + refIndex += Math.min(cigarElementLength, currentPos - windowStart); + break; + case H: + case P: + break; + } + } + + return sum; + } + + /** + * Returns the number of mismatches in the pileup element within the given reference context. + * + * @param read the SAMRecord + * @param ref the reference context + * @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good + * @param windowSize window size (on each side) to test + * @return a bitset representing which bases are good + */ + public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) { + // first determine the positions with mismatches + int readLength = read.getReadLength(); + BitSet mismatches = new BitSet(readLength); + + // it's possible we aren't starting at the beginning of a read, + // and we don't need to look at any of the previous context outside our window + // (although we do need future context) + int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize); + int currentReadPos = read.getAlignmentStart(); + + byte[] refBases = ref.getBases(); + int refIndex = readStartPos - ref.getWindow().getStart(); + if (refIndex < 0) { + throw new IllegalStateException("When calculating mismatches, we somehow don't have enough previous reference context for read " + read.getReadName() + " at position " + ref.getLocus()); + } + + byte[] readBases = read.getReadBases(); + int readIndex = 0; + + Cigar c = read.getCigar(); + + for (int i = 0; i < c.numCigarElements(); i++) { + CigarElement ce = c.getCigarElement(i); + int cigarElementLength = ce.getLength(); + switch (ce.getOperator()) { + case M: + for (int j = 0; j < cigarElementLength; j++, readIndex++) { + // skip over unwanted bases + if (currentReadPos++ < readStartPos) + continue; + + // this is possible if reads extend beyond the contig end + if (refIndex >= refBases.length) + break; + + byte refChr = refBases[refIndex]; + byte readChr = readBases[readIndex]; + if (readChr != refChr) + mismatches.set(readIndex); + + refIndex++; + } + break; + case I: + case S: + readIndex += cigarElementLength; + break; + case D: + case N: + if (currentReadPos >= readStartPos) + refIndex += cigarElementLength; + currentReadPos += cigarElementLength; + break; + case H: + case P: + break; + } + } + + // all bits are set to false by default + BitSet result = new BitSet(readLength); + + int currentPos = 0, leftPos = 0, rightPos; + int mismatchCount = 0; + + // calculate how many mismatches exist in the windows to the left/right + for (rightPos = 1; rightPos <= windowSize && rightPos < readLength; rightPos++) { + if (mismatches.get(rightPos)) + mismatchCount++; + } + if (mismatchCount <= maxMismatches) + result.set(currentPos); + + // now, traverse over the read positions + while (currentPos < readLength) { + // add a new rightmost position + if (rightPos < readLength && mismatches.get(rightPos++)) + mismatchCount++; + // re-penalize the previous position + if (mismatches.get(currentPos++)) + mismatchCount++; + // don't penalize the current position + if (mismatches.get(currentPos)) + mismatchCount--; + // subtract the leftmost position + if (leftPos < currentPos - windowSize && mismatches.get(leftPos++)) + mismatchCount--; + + if (mismatchCount <= maxMismatches) + result.set(currentPos); + } + + return result; + } + + /** + * Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment. + * This method follows closely the SAMRecord::getAlignmentBlocks() implemented in samtools library, but + * it only counts blocks without actually allocating and filling the list of blocks themselves. Hence, this method is + * a much more efficient alternative to r.getAlignmentBlocks.size() in the situations when this number is all that is needed. + * Formally, this method simply returns the number of M elements in the cigar. + * + * @param r alignment + * @return number of continuous alignment blocks (i.e. 'M' elements of the cigar; all indel and clipping elements are ignored). + */ + public static int getNumAlignmentBlocks(final SAMRecord r) { + int n = 0; + final Cigar cigar = r.getCigar(); + if (cigar == null) return 0; + + for (final CigarElement e : cigar.getCigarElements()) { + if (e.getOperator() == CigarOperator.M) n++; + } + + return n; + } + + public static int getNumAlignedBases(final SAMRecord r) { + int n = 0; + final Cigar cigar = r.getCigar(); + if (cigar == null) return 0; + + for (final CigarElement e : cigar.getCigarElements()) + if (e.getOperator() == CigarOperator.M) + n += e.getLength(); + + return n; + } + + public static byte[] alignmentToByteArray(final Cigar cigar, final byte[] read, final byte[] ref) { + + final byte[] alignment = new byte[read.length]; + int refPos = 0; + int alignPos = 0; + + for (int iii = 0; iii < cigar.numCigarElements(); iii++) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch (ce.getOperator()) { + case I: + case S: + for (int jjj = 0; jjj < elementLength; jjj++) { + alignment[alignPos++] = '+'; + } + break; + case D: + case N: + refPos += elementLength; + break; + case M: + for (int jjj = 0; jjj < elementLength; jjj++) { + alignment[alignPos] = ref[refPos]; + alignPos++; + refPos++; + } + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return alignment; + } + + public static int calcAlignmentByteArrayOffset(final Cigar cigar, PileupElement pileup, final int alignmentStart, final int refLocus) { + int pileupOffset = pileup.getOffset(); + + // Special case for reads starting with insertion + if (pileup.isInsertionAtBeginningOfRead()) + return 0; + + // Reassign the offset if we are in the middle of a deletion because of the modified representation of the read bases + if (pileup.isDeletion()) { + pileupOffset = refLocus - alignmentStart; + final CigarElement ce = cigar.getCigarElement(0); + if (ce.getOperator() == CigarOperator.S) { + pileupOffset += ce.getLength(); + } + } + + int pos = 0; + int alignmentPos = 0; + + for (int iii = 0; iii < cigar.numCigarElements(); iii++) { + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch (ce.getOperator()) { + case I: + case S: + pos += elementLength; + if (pos >= pileupOffset) { + return alignmentPos; + } + break; + case D: + case N: + if (!pileup.isDeletion()) { + alignmentPos += elementLength; + } else { + if (pos + elementLength - 1 >= pileupOffset) { + return alignmentPos + (pileupOffset - pos); + } else { + pos += elementLength; + alignmentPos += elementLength; + } + } + break; + case M: + if (pos + elementLength - 1 >= pileupOffset) { + return alignmentPos + (pileupOffset - pos); + } else { + pos += elementLength; + alignmentPos += elementLength; + } + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + + return alignmentPos; + } + + public static byte[] readToAlignmentByteArray(final Cigar cigar, final byte[] read) { + + int alignmentLength = 0; + for (int iii = 0; iii < cigar.numCigarElements(); iii++) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch (ce.getOperator()) { + case I: + case S: + break; + case D: + case N: + alignmentLength += elementLength; + break; + case M: + alignmentLength += elementLength; + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + + final byte[] alignment = new byte[alignmentLength]; + int alignPos = 0; + int readPos = 0; + for (int iii = 0; iii < cigar.numCigarElements(); iii++) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch (ce.getOperator()) { + case I: + if (alignPos > 0) { + if (alignment[alignPos - 1] == BaseUtils.A) { + alignment[alignPos - 1] = PileupElement.A_FOLLOWED_BY_INSERTION_BASE; + } else if (alignment[alignPos - 1] == BaseUtils.C) { + alignment[alignPos - 1] = PileupElement.C_FOLLOWED_BY_INSERTION_BASE; + } else if (alignment[alignPos - 1] == BaseUtils.T) { + alignment[alignPos - 1] = PileupElement.T_FOLLOWED_BY_INSERTION_BASE; + } else if (alignment[alignPos - 1] == BaseUtils.G) { + alignment[alignPos - 1] = PileupElement.G_FOLLOWED_BY_INSERTION_BASE; + } + } + case S: + for (int jjj = 0; jjj < elementLength; jjj++) { + readPos++; + } + break; + case D: + case N: + for (int jjj = 0; jjj < elementLength; jjj++) { + alignment[alignPos] = PileupElement.DELETION_BASE; + alignPos++; + } + break; + case M: + for (int jjj = 0; jjj < elementLength; jjj++) { + alignment[alignPos] = read[readPos]; + alignPos++; + readPos++; + } + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return alignment; + } + + /** + * Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format + * specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and + * alignment reference index/start. + * + * @param r record + * @return true if read is unmapped + */ + public static boolean isReadUnmapped(final SAMRecord r) { + if (r.getReadUnmappedFlag()) return true; + + // our life would be so much easier if all sam files followed the specs. In reality, + // sam files (including those generated by maq or bwa) miss headers altogether. When + // reading such a SAM file, reference name is set, but since there is no sequence dictionary, + // null is always returned for referenceIndex. Let's be paranoid here, and make sure that + // we do not call the read "unmapped" when it has only reference name set with ref. index missing + // or vice versa. + if ((r.getReferenceIndex() != null && r.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX + || r.getReferenceName() != null && !r.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) + && r.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false; + return true; + } + + /** + * Due to (unfortunate) multiple ways to indicate that read/mate is unmapped allowed by SAM format + * specification, one may need this convenience shortcut. Checks both 'mate unmapped' flag and + * alignment reference index/start of the mate. + * + * @param r sam record for the read + * @return true if read's mate is unmapped + */ + public static boolean isMateUnmapped(final SAMRecord r) { + if (r.getMateUnmappedFlag()) return true; + + // our life would be so much easier if all sam files followed the specs. In reality, + // sam files (including those generated by maq or bwa) miss headers altogether. When + // reading such a SAM file, reference name is set, but since there is no sequence dictionary, + // null is always returned for referenceIndex. Let's be paranoid here, and make sure that + // we do not call the read "unmapped" when it has only reference name set with ref. index missing + // or vice versa. + if ((r.getMateReferenceIndex() != null && r.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX + || r.getMateReferenceName() != null && !r.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) + && r.getMateAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) return false; + return true; + } + + /** + * Returns true is read is mapped and mapped uniquely (Q>0). + * + * @param read + * @return + */ + public static boolean isReadUniquelyMapped(SAMRecord read) { + return (!AlignmentUtils.isReadUnmapped(read)) && read.getMappingQuality() > 0; + } + + /** + * Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from + * cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base + * qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array + * of read's base qualitites is inverted (in this case new array is allocated and returned). + * + * @param read + * @return + */ + public static byte[] getQualsInCycleOrder(SAMRecord read) { + if (isReadUnmapped(read) || !read.getReadNegativeStrandFlag()) return read.getBaseQualities(); + + return Utils.reverse(read.getBaseQualities()); + } + + /** + * Returns the array of original base qualitites (before recalibration) in the order the bases were read on the machine (i.e. always starting from + * cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base + * qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array + * of read's base qualitites is inverted (in this case new array is allocated and returned). If no original base qualities + * are available this method will throw a runtime exception. + * + * @param read + * @return + */ + public static byte[] getOriginalQualsInCycleOrder(SAMRecord read) { + if (isReadUnmapped(read) || !read.getReadNegativeStrandFlag()) return read.getOriginalBaseQualities(); + + return Utils.reverse(read.getOriginalBaseQualities()); + } + + /** + * Takes the alignment of the read sequence readSeq to the reference sequence refSeq + * starting at 0-based position refIndex on the refSeq and specified by its cigar. + * The last argument readIndex specifies 0-based position on the read where the alignment described by the + * cigar starts. Usually cigars specify alignments of the whole read to the ref, so that readIndex is normally 0. + * Use non-zero readIndex only when the alignment cigar represents alignment of a part of the read. The refIndex in this case + * should be the position where the alignment of that part of the read starts at. In other words, both refIndex and readIndex are + * always the positions where the cigar starts on the ref and on the read, respectively. + *

+ * If the alignment has an indel, then this method attempts moving this indel left across a stretch of repetitive bases. For instance, if the original cigar + * specifies that (any) one AT is deleted from a repeat sequence TATATATA, the output cigar will always mark the leftmost AT + * as deleted. If there is no indel in the original cigar, or the indel position is determined unambiguously (i.e. inserted/deleted sequence + * is not repeated), the original cigar is returned. + * + * @param cigar structure of the original alignment + * @param refSeq reference sequence the read is aligned to + * @param readSeq read sequence + * @param refIndex 0-based alignment start position on ref + * @param readIndex 0-based alignment start position on read + * @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any) + */ + public static Cigar leftAlignIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) { + + int indexOfIndel = -1; + for (int i = 0; i < cigar.numCigarElements(); i++) { + CigarElement ce = cigar.getCigarElement(i); + if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { + // if there is more than 1 indel, don't left align + if (indexOfIndel != -1) + return cigar; + indexOfIndel = i; + } + } + + // if there is no indel or if the alignment starts with an insertion (so that there + // is no place on the read to move that insertion further left), we are done + if (indexOfIndel < 1) return cigar; + + final int indelLength = cigar.getCigarElement(indexOfIndel).getLength(); + + byte[] altString = createIndelString(cigar, indexOfIndel, refSeq, readSeq, refIndex, readIndex); + if (altString == null) + return cigar; + + Cigar newCigar = cigar; + for (int i = 0; i < indelLength; i++) { + newCigar = moveCigarLeft(newCigar, indexOfIndel); + byte[] newAltString = createIndelString(newCigar, indexOfIndel, refSeq, readSeq, refIndex, readIndex); + + // check to make sure we haven't run off the end of the read + boolean reachedEndOfRead = cigarHasZeroSizeElement(newCigar); + + if (Arrays.equals(altString, newAltString)) { + cigar = newCigar; + i = -1; + if (reachedEndOfRead) + cigar = cleanUpCigar(cigar); + } + + if (reachedEndOfRead) + break; + } + + return cigar; + } + + private static boolean cigarHasZeroSizeElement(Cigar c) { + for (CigarElement ce : c.getCigarElements()) { + if (ce.getLength() == 0) + return true; + } + return false; + } + + private static Cigar cleanUpCigar(Cigar c) { + ArrayList elements = new ArrayList(c.numCigarElements() - 1); + for (CigarElement ce : c.getCigarElements()) { + if (ce.getLength() != 0 && + (elements.size() != 0 || ce.getOperator() != CigarOperator.D)) { + elements.add(ce); + } + } + return new Cigar(elements); + } + + private static Cigar moveCigarLeft(Cigar cigar, int indexOfIndel) { + // get the first few elements + ArrayList elements = new ArrayList(cigar.numCigarElements()); + for (int i = 0; i < indexOfIndel - 1; i++) + elements.add(cigar.getCigarElement(i)); + + // get the indel element and move it left one base + CigarElement ce = cigar.getCigarElement(indexOfIndel - 1); + elements.add(new CigarElement(ce.getLength() - 1, ce.getOperator())); + elements.add(cigar.getCigarElement(indexOfIndel)); + if (indexOfIndel + 1 < cigar.numCigarElements()) { + ce = cigar.getCigarElement(indexOfIndel + 1); + elements.add(new CigarElement(ce.getLength() + 1, ce.getOperator())); + } else { + elements.add(new CigarElement(1, CigarOperator.M)); + } + + // get the last few elements + for (int i = indexOfIndel + 2; i < cigar.numCigarElements(); i++) + elements.add(cigar.getCigarElement(i)); + return new Cigar(elements); + } + + private static byte[] createIndelString(final Cigar cigar, final int indexOfIndel, final byte[] refSeq, final byte[] readSeq, int refIndex, int readIndex) { + CigarElement indel = cigar.getCigarElement(indexOfIndel); + int indelLength = indel.getLength(); + + int totalRefBases = 0; + for (int i = 0; i < indexOfIndel; i++) { + CigarElement ce = cigar.getCigarElement(i); + int length = ce.getLength(); + + switch (ce.getOperator()) { + case M: + readIndex += length; + refIndex += length; + totalRefBases += length; + break; + case S: + readIndex += length; + break; + case N: + refIndex += length; + totalRefBases += length; + break; + default: + break; + } + } + + // sometimes, when there are very large known indels, we won't have enough reference sequence to cover them + if (totalRefBases + indelLength > refSeq.length) + indelLength -= (totalRefBases + indelLength - refSeq.length); + + // the indel-based reference string + byte[] alt = new byte[refSeq.length + (indelLength * (indel.getOperator() == CigarOperator.D ? -1 : 1))]; + + // add the bases before the indel, making sure it's not aligned off the end of the reference + if (refIndex > alt.length || refIndex > refSeq.length) + return null; + System.arraycopy(refSeq, 0, alt, 0, refIndex); + int currentPos = refIndex; + + // take care of the indel + if (indel.getOperator() == CigarOperator.D) { + refIndex += indelLength; + } else { + System.arraycopy(readSeq, readIndex, alt, currentPos, indelLength); + currentPos += indelLength; + } + + // add the bases after the indel, making sure it's not aligned off the end of the reference + if (refSeq.length - refIndex > alt.length - currentPos) + return null; + System.arraycopy(refSeq, refIndex, alt, currentPos, refSeq.length - refIndex); + + return alt; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 542adea77..8661d5ad0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -27,7 +27,7 @@ public class ArtificialSAMUtils { * @param chromosomeSize how large each chromosome is * @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads) */ - public static void createArtificialBamFile( String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome ) { + public static void createArtificialBamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) { SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize); File outFile = new File(filename); @@ -51,7 +51,7 @@ public class ArtificialSAMUtils { * @param chromosomeSize how large each chromosome is * @param readsPerChomosome how many reads to make in each chromosome. They'll be aligned from position 1 to x (which is the number of reads) */ - public static void createArtificialSamFile( String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome ) { + public static void createArtificialSamFile(String filename, int numberOfChromosomes, int startingChromosome, int chromosomeSize, int readsPerChomosome) { SAMFileHeader header = createArtificialSamHeader(numberOfChromosomes, startingChromosome, chromosomeSize); File outFile = new File(filename); @@ -72,16 +72,15 @@ public class ArtificialSAMUtils { * @param numberOfChromosomes the number of chromosomes to create * @param startingChromosome the starting number for the chromosome (most likely set to 1) * @param chromosomeSize the length of each chromosome - * * @return */ - public static SAMFileHeader createArtificialSamHeader( int numberOfChromosomes, int startingChromosome, int chromosomeSize ) { + public static SAMFileHeader createArtificialSamHeader(int numberOfChromosomes, int startingChromosome, int chromosomeSize) { SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate); SAMSequenceDictionary dict = new SAMSequenceDictionary(); // make up some sequence records for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { - SAMSequenceRecord rec = new SAMSequenceRecord("chr" + ( x ), chromosomeSize /* size */); + SAMSequenceRecord rec = new SAMSequenceRecord("chr" + (x), chromosomeSize /* size */); rec.setSequenceLength(chromosomeSize); dict.addSequence(rec); } @@ -95,10 +94,9 @@ public class ArtificialSAMUtils { * @param header the header to set * @param readGroupID the read group ID tag * @param sampleName the sample name - * * @return the adjusted SAMFileHeader */ - public static SAMFileHeader createDefaultReadGroup( SAMFileHeader header, String readGroupID, String sampleName ) { + public static SAMFileHeader createDefaultReadGroup(SAMFileHeader header, String readGroupID, String sampleName) { SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupID); rec.setSample(sampleName); List readGroups = new ArrayList(); @@ -113,10 +111,9 @@ public class ArtificialSAMUtils { * @param header the header to set * @param readGroupIDs the read group ID tags * @param sampleNames the sample names - * * @return the adjusted SAMFileHeader */ - public static SAMFileHeader createEnumeratedReadGroups( SAMFileHeader header, List readGroupIDs, List sampleNames ) { + public static SAMFileHeader createEnumeratedReadGroups(SAMFileHeader header, List readGroupIDs, List sampleNames) { if (readGroupIDs.size() != sampleNames.size()) { throw new ReviewedStingException("read group count and sample name count must be the same"); } @@ -137,18 +134,16 @@ public class ArtificialSAMUtils { /** * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * - * * @param header the SAM header to associate the read with * @param name the name of the read * @param refIndex the reference index, i.e. what chromosome to associate it with * @param alignmentStart where to start the alignment * @param length the length of the read - * * @return the artificial read */ public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) { - if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || - (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) + if ((refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || + (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START)) throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(name); @@ -183,10 +178,9 @@ public class ArtificialSAMUtils { * @param alignmentStart where to start the alignment * @param bases the sequence of the read * @param qual the qualities of the read - * * @return the artificial read */ - public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { + public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual) { if (bases.length != qual.length) { throw new ReviewedStingException("Passed in read string is different length then the quality array"); } @@ -210,10 +204,9 @@ public class ArtificialSAMUtils { * @param bases the sequence of the read * @param qual the qualities of the read * @param cigar the cigar string of the read - * * @return the artificial read */ - public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) { + public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar) { GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual); rec.setCigarString(cigar); return rec; @@ -221,22 +214,21 @@ public class ArtificialSAMUtils { /** * Create an artificial read with the following default parameters : - * header: - * numberOfChromosomes = 1 - * startingChromosome = 1 - * chromosomeSize = 1000000 - * read: - * name = "default_read" - * refIndex = 0 - * alignmentStart = 1 - * - * @param bases the sequence of the read - * @param qual the qualities of the read - * @param cigar the cigar string of the read + * header: + * numberOfChromosomes = 1 + * startingChromosome = 1 + * chromosomeSize = 1000000 + * read: + * name = "default_read" + * refIndex = 0 + * alignmentStart = 1 * + * @param bases the sequence of the read + * @param qual the qualities of the read + * @param cigar the cigar string of the read * @return the artificial read */ - public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) { + public static GATKSAMRecord createArtificialRead(byte[] bases, byte[] qual, String cigar) { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000); return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 10000, bases, qual, cigar); } @@ -253,7 +245,7 @@ public class ArtificialSAMUtils { right.setProperPairFlag(true); left.setFirstOfPairFlag(leftIsFirst); - right.setFirstOfPairFlag(! leftIsFirst); + right.setFirstOfPairFlag(!leftIsFirst); left.setReadNegativeStrandFlag(leftIsNegative); left.setMateNegativeStrandFlag(!leftIsNegative); @@ -279,11 +271,10 @@ public class ArtificialSAMUtils { * @param startingChr the chromosome (reference ID) to start from * @param endingChr the id to end with * @param readCount the number of reads per chromosome - * * @return StingSAMIterator representing the specified amount of fake data */ - public static StingSAMIterator mappedReadIterator( int startingChr, int endingChr, int readCount ) { - SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + public static StingSAMIterator mappedReadIterator(int startingChr, int endingChr, int readCount) { + SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header); } @@ -295,11 +286,10 @@ public class ArtificialSAMUtils { * @param endingChr the id to end with * @param readCount the number of reads per chromosome * @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file - * * @return StingSAMIterator representing the specified amount of fake data */ - public static StingSAMIterator mappedAndUnmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { - SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + public static StingSAMIterator mappedAndUnmappedReadIterator(int startingChr, int endingChr, int readCount, int unmappedReadCount) { + SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); } @@ -310,11 +300,10 @@ public class ArtificialSAMUtils { * @param startingChr the chromosome (reference ID) to start from * @param endingChr the id to end with * @param readCount the number of reads per chromosome - * * @return StingSAMIterator representing the specified amount of fake data */ - public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount ) { - SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + public static ArtificialSAMQueryIterator queryReadIterator(int startingChr, int endingChr, int readCount) { + SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header); } @@ -326,11 +315,10 @@ public class ArtificialSAMUtils { * @param endingChr the id to end with * @param readCount the number of reads per chromosome * @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file - * * @return StingSAMIterator representing the specified amount of fake data */ - public static StingSAMIterator queryReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { - SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + public static StingSAMIterator queryReadIterator(int startingChr, int endingChr, int readCount, int unmappedReadCount) { + SAMFileHeader header = createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); } @@ -345,6 +333,7 @@ public class ArtificialSAMUtils { * reads created that have readLen bases. Pairs are sampled from a gaussian distribution with mean insert * size of insertSize and variation of insertSize / 10. The first read will be in the pileup, and the second * may be, depending on where this sampled insertSize puts it. + * * @param header * @param loc * @param readLen @@ -360,22 +349,22 @@ public class ArtificialSAMUtils { final int pos = loc.getStart(); final List pileupElements = new ArrayList(); - for ( int i = 0; i < pileupSize / 2; i++ ) { + for (int i = 0; i < pileupSize / 2; i++) { final String readName = "read" + i; final int leftStart = ranIntInclusive(ran, 1, pos); - final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize); + final int fragmentSize = (int) (ran.nextGaussian() * insertSizeVariation + insertSize); final int rightStart = leftStart + fragmentSize - readLen; - if ( rightStart <= 0 ) continue; + if (rightStart <= 0) continue; List pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); final GATKSAMRecord left = pair.get(0); final GATKSAMRecord right = pair.get(1); - pileupElements.add(new PileupElement(left, pos - leftStart)); + pileupElements.add(new PileupElement(left, pos - leftStart, false)); - if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) { - pileupElements.add(new PileupElement(right, pos - rightStart)); + if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) { + pileupElements.add(new PileupElement(right, pos - rightStart, false)); } } diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 61829dcfc..626b91cbf 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -1,13 +1,20 @@ package org.broadinstitute.sting; -import org.apache.log4j.*; +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; import org.apache.log4j.spi.LoggingEvent; import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.io.IOUtils; -import java.io.*; -import java.util.*; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 5cdf12f1b..e9b4fc211 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("d61c7055bd09024abb8902bde6bd3960")); + Arrays.asList("653172b43b19003d9f7df6dab21f4b09")); executeTest("test MultiSample Pilot1", spec); } @@ -227,7 +227,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("b11df6587e4e16cb819d76a900446946")); + Arrays.asList("bd9d3d50a1f49605d7cd592a0f446899")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -255,7 +255,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("59068bc8888ad5f08790946066d76602")); + Arrays.asList("91cd6d2e3972b0b8e4064bb35a33241f")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("fcd590a55f5fec2a9b7e628187d6b8a8")); + Arrays.asList("877de5b0cc61dc54636062df6399b978")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 367f6294d..1a8086a1b 100755 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -42,12 +42,12 @@ public class ReadUtilsUnitTest extends BaseTest { @Test public void testReducedReadPileupElement() { - PileupElement readp = new PileupElement(read, 0); - PileupElement reducedreadp = new PileupElement(reducedRead, 0); + PileupElement readp = new PileupElement(read, 0, false); + PileupElement reducedreadp = new PileupElement(reducedRead, 0, false); - Assert.assertFalse(readp.isReducedRead()); + Assert.assertFalse(readp.getRead().isReducedRead()); - Assert.assertTrue(reducedreadp.isReducedRead()); + Assert.assertTrue(reducedreadp.getRead().isReducedRead()); Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); }