/* * Copyright (c) 2010, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.utils.pileup; import org.broadinstitute.sting.utils.exceptions.GATKException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import java.util.*; import net.sf.samtools.SAMRecord; public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { private int nInsertions; private int maxDeletionLength; // cached value of the length of the longest deletion observed at the site public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileupElements) { super(loc,pileupElements); } public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, PileupElementTracker tracker) { super(loc,tracker); } /** * Optimization of above constructor where all of the cached data is provided * @param loc * @param pileup */ public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileup, int size, int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads) { super(loc,pileup,size,nDeletions,nMQ0Reads); this.maxDeletionLength = maxDeletionLength; this.nInsertions = nInsertions; } public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map pileupElementsBySample) { super(loc,pileupElementsBySample); } /** * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling * sizes, nDeletion, etc. over and over potentially. */ @Override protected void calculateCachedData() { super.calculateCachedData(); nInsertions = 0; nMQ0Reads = 0; for ( ExtendedEventPileupElement p : this.toExtendedIterable() ) { if ( p.isDeletion() ) { maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength()); } else { if ( p.isInsertion() ) nInsertions++; } } } @Override protected void addPileupToCumulativeStats(AbstractReadBackedPileup pileup) { super.addPileupToCumulativeStats(pileup); ReadBackedExtendedEventPileup extendedEventPileup = ((ReadBackedExtendedEventPileup)pileup); this.nInsertions += extendedEventPileup.getNumberOfInsertions(); this.maxDeletionLength += extendedEventPileup.getMaxDeletionLength(); } @Override protected ReadBackedExtendedEventPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker tracker) { return new ReadBackedExtendedEventPileupImpl(loc,tracker); } @Override protected ExtendedEventPileupElement createNewPileupElement(SAMRecord read, int offset) { throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); } /** * Returns the number of insertion events in this pileup * * @return */ @Override public int getNumberOfInsertions() { return nInsertions; } /** Returns the length of the longest deletion observed at the site this * pileup is associated with (NOTE: by convention, both insertions and deletions * are associated with genomic location immediately before the actual event). If * there are no deletions at the site, returns 0. * @return */ @Override public int getMaxDeletionLength() { return maxDeletionLength; } public Iterable toExtendedIterable() { return new Iterable() { public Iterator iterator() { return pileupElementTracker.iterator(); } }; } /** * Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time * @return */ @Override public byte[] getEvents() { byte[] v = new byte[size()]; int i = 0; for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) { switch ( e.getType() ) { case INSERTION: v[i] = 'I'; break; case DELETION: v[i] = 'D'; break; case NOEVENT: v[i] = '.'; break; default: throw new GATKException("Unknown event type encountered: "+e.getType()); } i++; } return v; } /** A shortcut for getEventStringsWithCounts(null); * * @return */ @Override public List> getEventStringsWithCounts() { return getEventStringsWithCounts(null); } @Override public String getShortPileupString() { // In the pileup format, each extended event line has genomic position (chromosome name and offset), // reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing // insertion, deletion or no-event, respectively. return String.format("%s %s E %s", getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate new String(getEvents()) ); } /** Returns String representation of all distinct extended events (indels) at the site along with * observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for * deletions will be generated as "D" (i.e. "5D"); if the reference bases are provided, the actual * deleted sequence will be used in the string representation (e.g. "-AAC"). * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and * extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+) * @return list of distinct events; first element of a pair is a string representation of the event, second element * gives the number of reads, in which that event was observed */ @Override public List> getEventStringsWithCounts(byte[] refBases) { Map events = new HashMap(); for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) { Integer cnt; String indel = null; switch ( e.getType() ) { case INSERTION: indel = "+"+e.getEventBases(); break; case DELETION: indel = getDeletionString(e.getEventLength(),refBases); break; case NOEVENT: continue; default: throw new GATKException("Unknown event type encountered: "+e.getType()); } cnt = events.get(indel); if ( cnt == null ) events.put(indel,1); else events.put(indel,cnt.intValue()+1); } List> eventList = new ArrayList>(events.size()); for ( Map.Entry m : events.entrySet() ) { eventList.add( new Pair(m.getKey(),m.getValue())); } return eventList; } /** * Builds string representation of the deletion event. If refBases is null, the representation will be * "D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC") * will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted * base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the * deletion length (i.e. size of refBase must be at least length+1). * @param length * @param refBases * @return */ private String getDeletionString(int length, byte[] refBases) { if ( refBases == null ) { return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D" } else { return "-"+new String(refBases,1,length).toUpperCase(); } } }