package org.broadinstitute.sting.utils.pileup; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.IterableIterator; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; import java.util.List; import java.util.ArrayList; import java.util.Iterator; import net.sf.samtools.SAMRecord; /** * Version two file implementing pileups of bases in reads at a locus. * * @author Mark DePristo */ public class ReadBackedPileup implements Iterable { private GenomeLoc loc = null; private char ref = 0; private ArrayList pileup = null; public ReadBackedPileup(char ref, AlignmentContext context ) { this(context.getLocation(), ref, context.getReads(), context.getOffsets()); } public ReadBackedPileup(GenomeLoc loc, char ref, List reads, List offsets ) { this(loc, ref, readsOffsets2Pileup(reads, offsets)); } public ReadBackedPileup(GenomeLoc loc, char ref, ArrayList pileup ) { if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2"); if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2"); this.loc = loc; this.ref = ref; this.pileup = pileup; } private static ArrayList readsOffsets2Pileup(List reads, List offsets ) { if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2"); if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2"); if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!"); ArrayList pileup = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) ); } return pileup; } // // iterators // public Iterator iterator() { return pileup.iterator(); } // todo -- reimplement efficiently public IterableIterator extendedForeachIterator() { ArrayList x = new ArrayList(size()); int i = 0; for ( PileupElement pile : this ) { x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); } return new IterableIterator(x.iterator()); } public ReadBackedPileup getPileupWithoutDeletions() { // todo -- fixme if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions List newReads = new ArrayList(); List newOffsets = new ArrayList(); for ( int i = 0; i < size(); i++ ) { if ( getOffsets().get(i) != -1 ) { newReads.add(getReads().get(i)); newOffsets.add(getOffsets().get(i)); } } return new ReadBackedPileup(loc, ref, newReads, newOffsets); } else { return this; } } public int getNumberOfDeletions() { int n = 0; for ( int i = 0; i < size(); i++ ) { if ( getOffsets().get(i) != -1 ) { n++; } } return n; } // todo -- optimize me public int size() { return pileup.size(); } public GenomeLoc getLocation() { return loc; } public char getRef() { return ref; } public List getReads() { List reads = new ArrayList(size()); for ( PileupElement pile : this ) { reads.add(pile.getRead()); } return reads; } public List getOffsets() { List offsets = new ArrayList(size()); for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } return offsets; } public byte[] getBases() { byte[] v = new byte[size()]; for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); } return v; } public byte[] getSecondaryBases() { byte[] v = new byte[size()]; for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); } return v; } public byte[] getQuals() { byte[] v = new byte[size()]; for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); } return v; } public int[] getBaseCounts() { int[] counts = new int[4]; for ( PileupElement pile : this ) { // skip deletion sites if ( ! pile.isDeletion() ) { char base = Character.toUpperCase((char)(pile.getBase())); if (BaseUtils.simpleBaseToBaseIndex(base) == -1) continue; counts[BaseUtils.simpleBaseToBaseIndex(base)]++; } } return counts; } public boolean hasSecondaryBases() { for ( PileupElement pile : this ) { // skip deletion sites if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) ) return true; } return false; } public String getPileupString(boolean qualsAsInts) { // In the pileup format, each line represents a genomic position, consisting of chromosome name, // coordinate, reference base, read bases, read qualities and alignment mapping qualities. //return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals()); return String.format("%s %s %s %s", getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate getRef(), // reference base new String(getBases())); } }