A refactoring / unification of ReadBackedPileup and ReadBackedExtendedEventPileup.

Provides a cleaner interface with extended events inheriting all of the basic RBP
functionality.  Implementation is still slightly messy, but should allow users to
provide separate implementations of methods for sample split pileups and unsplit
pileups for efficiency's sake.
Methods not covered by unit/integration tests have not been sufficiently tested yet.
Unit tests will follow this week.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3597 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-06-20 04:42:26 +00:00
parent 57a13805da
commit f18ac069e2
26 changed files with 1205 additions and 1873 deletions

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*;
@ -81,7 +81,7 @@ public class AlignmentContext {
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc;
this.basePileup = new UnifiedReadBackedPileup(loc, reads, offsets);
this.basePileup = new ReadBackedPileupImpl(loc, reads, offsets);
this.skippedBases = skippedBases;
}

View File

@ -38,7 +38,7 @@ import java.util.*;
* User: ebanks
* Modified: chartl (split by read group)
*/
public class StratifiedAlignmentContext {
public class StratifiedAlignmentContext<PE extends PileupElement> {
// Definitions:
// COMPLETE = full alignment context
@ -55,7 +55,7 @@ public class StratifiedAlignmentContext {
// private ArrayList<SAMRecord>[] reads = new ArrayList[StratifiedContextType.values().length];
// private ArrayList<Integer>[] offsets = new ArrayList[StratifiedContextType.values().length];
private ArrayList<PileupElement>[] pileupElems = new ArrayList[StratifiedContextType.values().length];
private ArrayList<PE>[] pileupElems = new ArrayList[StratifiedContextType.values().length];
//
// accessors
//
@ -63,7 +63,7 @@ public class StratifiedAlignmentContext {
// public ArrayList<SAMRecord> getReads(StratifiedContextType type) { return reads[type.ordinal()]; }
// public ArrayList<Integer> getOffsets(StratifiedContextType type) { return offsets[type.ordinal()]; }
public ArrayList<PileupElement> getPileupElements(StratifiedContextType type) {
public ArrayList<PE> getPileupElements(StratifiedContextType type) {
return pileupElems[type.ordinal()];
}
@ -81,8 +81,8 @@ public class StratifiedAlignmentContext {
this.loc = loc;
this.isExtended = isExtended;
for ( int i = 0; i < StratifiedContextType.values().length; i++) {
if ( isExtended ) pileupElems[i] = new ArrayList<PileupElement>();
else pileupElems[i] = new ArrayList<PileupElement>();
if ( isExtended ) pileupElems[i] = new ArrayList<PE>();
else pileupElems[i] = new ArrayList<PE>();
}
}
@ -90,9 +90,9 @@ public class StratifiedAlignmentContext {
int index = type.ordinal();
if ( contexts[index] == null ) {
if ( isExtended ) {
contexts[index] = new AlignmentContext(loc , new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList<ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)getPileupElements(type))));
contexts[index] = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc,(List<ExtendedEventPileupElement>)getPileupElements(type)));
} else {
contexts[index] = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, getPileupElements(type)));
contexts[index] = new AlignmentContext(loc, new ReadBackedPileupImpl(loc,(List<PileupElement>)getPileupElements(type)));
}
}
return contexts[index];
@ -101,14 +101,14 @@ public class StratifiedAlignmentContext {
public void add(SAMRecord read, int offset) {
if ( isExtended ) throw new StingException("Can not add read/offset without event type specified to the context holding extended events");
if ( read.getReadNegativeStrandFlag() ) {
pileupElems[StratifiedContextType.REVERSE.ordinal()].add(new PileupElement(read,offset));
pileupElems[StratifiedContextType.REVERSE.ordinal()].add((PE)new PileupElement(read,offset));
} else {
pileupElems[StratifiedContextType.FORWARD.ordinal()].add(new PileupElement(read,offset));
pileupElems[StratifiedContextType.FORWARD.ordinal()].add((PE)new PileupElement(read,offset));
}
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(new PileupElement(read,offset));
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add((PE)new PileupElement(read,offset));
}
public void add(PileupElement p) {
public void add(PE p) {
// if ( isExtended ) throw new StingException("Can not add simple pileup element to the context holding extended events");
SAMRecord read = p.getRead();
if ( read.getReadNegativeStrandFlag() ) {
@ -122,11 +122,11 @@ public class StratifiedAlignmentContext {
public void add(SAMRecord read, int offset, int length, byte [] bases) {
if ( ! isExtended ) throw new StingException("Can not add read/offset with event type specified to the context holding simple events");
if ( read.getReadNegativeStrandFlag() ) {
pileupElems[StratifiedContextType.REVERSE.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases));
pileupElems[StratifiedContextType.REVERSE.ordinal()].add((PE)new ExtendedEventPileupElement(read,offset,length,bases));
} else {
pileupElems[StratifiedContextType.FORWARD.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases));
pileupElems[StratifiedContextType.FORWARD.ordinal()].add((PE)new ExtendedEventPileupElement(read,offset,length,bases));
}
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases));
pileupElems[StratifiedContextType.COMPLETE.ordinal()].add((PE)new ExtendedEventPileupElement(read,offset,length,bases));
}
// public void add(ExtendedEventPileupElement p) {
@ -276,8 +276,8 @@ public class StratifiedAlignmentContext {
return contexts;
}
public static AlignmentContext joinContexts(Collection<StratifiedAlignmentContext> contexts, StratifiedContextType type) {
ArrayList<PileupElement> pe = new ArrayList();
public static <PE> AlignmentContext joinContexts(Collection<StratifiedAlignmentContext> contexts, StratifiedContextType type) {
ArrayList<PE> pe = new ArrayList<PE>();
if ( contexts.size() == 0 )
throw new StingException("BUG: joinContexts requires at least one context to join");
@ -300,7 +300,7 @@ public class StratifiedAlignmentContext {
// dirty trick below. generics do not allow to cast pe (ArrayList<PileupElement>) directly to ArrayList<ExtendedEventPileupElement>,
// so we first cast to "? extends" wildcard, then to what we actually need.
if ( isExtended ) return new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)pe)) );
else return new AlignmentContext(loc, new UnifiedReadBackedPileup(loc,pe));
if ( isExtended ) return new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc,(List<ExtendedEventPileupElement>)pe) );
else return new AlignmentContext(loc, new ReadBackedPileupImpl(loc,(List<PileupElement>)pe));
}
}

View File

@ -33,8 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.MergingIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*;
@ -137,7 +136,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
// calculate the number of skipped bases, and update lastLoc so we can do that again in the next()
long skippedBases = getSkippedBases( rodSite );
lastLoc = site;
return new AlignmentContext(site, new UnifiedReadBackedPileup(site), skippedBases);
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
}
public LocusOverflowTracker getLocusOverflowTracker() {

View File

@ -359,7 +359,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
Map<String,ReadBackedExtendedEventPileup> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileup>();
Map<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>> fullExtendedEventPileup = new HashMap<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>>();
SAMRecordState our1stState = readStates.getFirst();
// get current location on the reference and decrement it by 1: the indels we just stepped over
@ -412,16 +412,16 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
nMQ0Reads++;
}
// TODO: sample split!
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads));
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads));
}
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
// System.out.println("Indel(s) at "+loc);
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
nextAlignmentContext = new AlignmentContext(loc, new SampleSplitReadBackedExtendedEventPileup(loc, fullExtendedEventPileup));
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup));
} else {
GenomeLoc location = getLocation();
Map<String,ReadBackedPileup> fullPileup = new HashMap<String,ReadBackedPileup>();
Map<String,AbstractReadBackedPileup<?,PileupElement>> fullPileup = new HashMap<String,AbstractReadBackedPileup<?,PileupElement>>();
// todo -- performance problem -- should be lazy, really
for(String sampleName: sampleNames) {
@ -456,12 +456,12 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
nMQ0Reads++;
}
}
if( pile.size() != 0 ) fullPileup.put(sampleName,new UnifiedReadBackedPileup(location,pile,size,nDeletions,nMQ0Reads));
if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads));
}
updateReadStates(); // critical - must be called after we get the current state offsets and location
// if we got reads with non-D/N over the current position, we are done
if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new SampleSplitReadBackedPileup(location, fullPileup));
if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location,fullPileup));
}
}
}

View File

@ -396,7 +396,7 @@ public class LocusIteratorByState extends LocusIterator {
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
// System.out.println("Indel(s) at "+loc);
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
} else {
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
@ -427,7 +427,7 @@ public class LocusIteratorByState extends LocusIterator {
GenomeLoc loc = getLocation();
updateReadStates(); // critical - must be called after we get the current state offsets and location
// if we got reads with non-D/N over the current position, we are done
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new ReadBackedPileupImpl(loc, pile, size, nDeletions, nMQ0Reads));
}
}
}

View File

@ -36,8 +36,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import java.util.*;
@ -184,7 +183,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
// Jump forward in the reference to this locus location
AlignmentContext locus = new AlignmentContext(site, new UnifiedReadBackedPileup(site));
AlignmentContext locus = new AlignmentContext(site, new ReadBackedPileupImpl(site));
// update the number of duplicate sets we've seen
TraversalStatistics.nRecords++;

View File

@ -11,8 +11,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
/**
* A simple solution to iterating over all reference positions over a series of genomic locations.
@ -93,7 +92,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) {
GenomeLoc site = rodLocusView.getLocOneBeyondShard();
AlignmentContext ac = new AlignmentContext(site, new UnifiedReadBackedPileup(site), nSkipped);
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
M x = walker.map(null, null, ac);
sum = walker.reduce(x, sum);
}

View File

@ -68,7 +68,7 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA
countsBySize.put(al.length(),0);
}
for ( ExtendedEventPileupElement e : pileup ) {
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length
if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) {
countsBySize.put(e.getEventLength(),countsBySize.get(e.getEventLength())+1);

View File

@ -34,10 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.*;
import java.util.*;
import net.sf.samtools.SAMRecord;
@ -266,7 +263,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
}
}
return new UnifiedReadBackedPileup(pileup.getLocation(), reads, offsets);
return new ReadBackedPileupImpl(pileup.getLocation(), reads, offsets);
}
private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) {
@ -279,7 +276,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
}
}
return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup);
return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup);
}
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }

View File

@ -38,7 +38,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broad.tribble.vcf.VCFHeaderLine;
import java.util.*;
@ -159,7 +159,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
if ( readGroup != null && samples.contains(readGroup.getSample()) )
newPileup.add(p);
}
return new AlignmentContext(pileup.getLocation(), new UnifiedReadBackedPileup(pileup.getLocation(), newPileup));
return new AlignmentContext(pileup.getLocation(), new ReadBackedPileupImpl(pileup.getLocation(), newPileup));
}

View File

@ -238,19 +238,19 @@ public class UnifiedGenotyperEngine {
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup);
return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup);
}
// filter based on maximum mismatches and bad mates
private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
for ( ExtendedEventPileupElement p : pileup.toExtendedIterable() ) {
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new UnifiedReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup);
return new ReadBackedExtendedEventPileupImpl(pileup.getLocation(), filteredPileup);
}
private static boolean isValidDeletionFraction(double d) {

View File

@ -91,7 +91,7 @@ public class RealignerTargetCreator extends LocusWalker<RealignerTargetCreator.E
if ( pileup.getNumberOfInsertions() > 0 ) {
hasIndel = hasInsertion = true;
// check the ends of the reads to see how far they extend
for (ExtendedEventPileupElement p : pileup )
for (ExtendedEventPileupElement p : pileup.toExtendedIterable() )
furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd());
}
}

View File

@ -86,7 +86,7 @@ public class IndelErrorRateWalker extends LocusWalker<Integer,Integer> {
}
private void countIndels(ReadBackedExtendedEventPileup p) {
for ( ExtendedEventPileupElement pe : p ) {
for ( ExtendedEventPileupElement pe : p.toExtendedIterable() ) {
if ( ! pe.isIndel() ) continue;
if ( pe.getEventLength() > MAX_LENGTH ) continue;
if ( pe.isInsertion() ) insCounts[pe.getEventLength()-1]++;

View File

@ -529,7 +529,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
return violation;
}
private Double getAlleleProportion(Allele a, StratifiedAlignmentContext context) {
private Double getAlleleProportion(Allele a, StratifiedAlignmentContext<PileupElement> context) {
int numParental = 0;
int total = 0;
if ( context != null ) {

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import java.util.List;
import java.util.Arrays;
@ -115,7 +115,7 @@ public class DupUtils {
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0));
ReadBackedPileup pileup = new UnifiedReadBackedPileup(loc, duplicates, readOffset);
ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset);
final boolean debug = false;

View File

@ -0,0 +1,709 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* A generic implementation of read-backed pileups.
*
* @author mhanna
* @version 0.1
*/
public abstract class AbstractReadBackedPileup<RBP extends ReadBackedPileup,PE extends PileupElement> implements ReadBackedPileup {
protected final GenomeLoc loc;
protected final PileupElementTracker<PE> pileupElementTracker;
protected int size = 0; // cached value of the size of the pileup
protected int nDeletions = 0; // cached value of the number of deletions
protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
* go changing the reads.
*
* @param loc
* @param reads
* @param offsets
*/
public AbstractReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
this.loc = loc;
this.pileupElementTracker = readsOffsets2Pileup(reads,offsets);
}
public AbstractReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
this.loc = loc;
this.pileupElementTracker = readsOffsets2Pileup(reads,offset);
}
/**
* Create a new version of a read backed pileup at loc without any aligned reads
*
*/
public AbstractReadBackedPileup(GenomeLoc loc) {
this(loc, new UnifiedPileupElementTracker<PE>());
}
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public AbstractReadBackedPileup(GenomeLoc loc, List<PE> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup");
this.loc = loc;
this.pileupElementTracker = new UnifiedPileupElementTracker<PE>(pileup);
calculateCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public AbstractReadBackedPileup(GenomeLoc loc, List<PE> pileup, int size, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileupElementTracker = new UnifiedPileupElementTracker<PE>(pileup);
this.size = size;
this.nDeletions = nDeletions;
this.nMQ0Reads = nMQ0Reads;
}
protected AbstractReadBackedPileup(GenomeLoc loc, PileupElementTracker<PE> tracker) {
this.loc = loc;
this.pileupElementTracker = tracker;
calculateCachedData();
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
protected void calculateCachedData() {
size = 0;
nDeletions = 0;
nMQ0Reads = 0;
for ( PileupElement p : pileupElementTracker ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
/**
* Helper routine for converting reads and offset lists to a PileupElement list.
*
* @param reads
* @param offsets
* @return
*/
private PileupElementTracker<PE> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offsets == null ) throw new StingException("Illegal null offsets list in UnifiedReadBackedPileup");
if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!");
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add(createNewPileupElement(reads.get(i),offsets.get(i)));
}
return pileup;
}
/**
* Helper routine for converting reads and a single offset to a PileupElement list.
*
* @param reads
* @param offset
* @return
*/
private PileupElementTracker<PE> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offset < 0 ) throw new StingException("Illegal offset < 0 UnifiedReadBackedPileup");
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add(createNewPileupElement( reads.get(i), offset ));
}
return pileup;
}
protected abstract RBP createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
protected abstract PE createNewPileupElement(SAMRecord read, int offset);
// --------------------------------------------------------
//
// Special 'constructors'
//
// --------------------------------------------------------
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
@Override
public RBP getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
AbstractReadBackedPileup<RBP,PE> pileup = (AbstractReadBackedPileup<RBP,PE>)createNewPileup(loc,perSampleElements).getPileupWithoutDeletions();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
}
return createNewPileup(loc,filteredTracker);
}
else {
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for ( PE p : tracker ) {
if ( !p.isDeletion() ) {
filteredTracker.add(p);
}
}
return createNewPileup(loc, filteredTracker);
}
} else {
return (RBP)this;
}
}
/**
* Returns a new ReadBackedPileup where only one read from an overlapping read
* pair is retained. If the two reads in question disagree to their basecall,
* neither read is retained. If they agree on the base, the read with the higher
* quality observation is retained
*
* @return the newly filtered pileup
*/
@Override
public RBP getOverlappingFragmentFilteredPileup() {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
AbstractReadBackedPileup<RBP,PE> pileup = (AbstractReadBackedPileup<RBP,PE>)createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
}
return createNewPileup(loc,filteredTracker);
}
else {
Map<String,PE> filteredPileup = new HashMap<String, PE>();
for ( PE p : pileupElementTracker ) {
String readName = p.getRead().getReadName();
// if we've never seen this read before, life is good
if (!filteredPileup.containsKey(readName)) {
filteredPileup.put(readName, p);
} else {
PileupElement existing = filteredPileup.get(readName);
// if the reads disagree at this position, throw them both out. Otherwise
// keep the element with the higher quality score
if (existing.getBase() != p.getBase()) {
filteredPileup.remove(readName);
} else {
if (existing.getQual() < p.getQual()) {
filteredPileup.put(readName, p);
}
}
}
}
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE filteredElement: filteredPileup.values())
filteredTracker.add(filteredElement);
return createNewPileup(loc,filteredTracker);
}
}
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
@Override
public RBP getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
AbstractReadBackedPileup<RBP,PE> pileup = (AbstractReadBackedPileup<RBP,PE>)createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
}
return createNewPileup(loc,filteredTracker);
}
else {
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for ( PE p : tracker ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredTracker.add(p);
}
}
return createNewPileup(loc, filteredTracker);
}
} else {
return (RBP)this;
}
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @param minMapQ
* @return
*/
@Override
public RBP getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
AbstractReadBackedPileup<RBP,PE> pileup = (AbstractReadBackedPileup<RBP,PE>)createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ);
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
}
return createNewPileup(loc,filteredTracker);
}
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for ( PE p : pileupElementTracker ) {
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
filteredTracker.add(p);
}
}
return createNewPileup(loc, filteredTracker);
}
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @return
*/
@Override
public RBP getBaseFilteredPileup( int minBaseQ ) {
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
}
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ
* @return
*/
@Override
public RBP getMappingFilteredPileup( int minMapQ ) {
return getBaseAndMappingFilteredPileup(-1, minMapQ);
}
public Collection<String> getSamples() {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return tracker.getSamples();
}
else {
Collection<String> sampleNames = new HashSet<String>();
for(PileupElement p: this) {
SAMRecord read = p.getRead();
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
sampleNames.add(sampleName);
}
return sampleNames;
}
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public RBP getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return (RBP)this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(size)) )
i++;
}
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
int current = 0;
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for(PileupElement p: perSampleElements) {
if(positions.contains(current))
filteredPileup.add(p);
}
if(!filteredPileup.isEmpty()) {
AbstractReadBackedPileup<RBP,PE> pileup = (AbstractReadBackedPileup<RBP,PE>)createNewPileup(loc,perSampleElements);
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
}
current++;
}
return createNewPileup(loc,filteredTracker);
}
else {
UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>)pileupElementTracker;
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
Iterator positionIter = positions.iterator();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
filteredTracker.add(tracker.get(nextReadToKeep));
}
return createNewPileup(getLocation(), filteredTracker);
}
}
@Override
public RBP getPileupForSample(String sampleName) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return createNewPileup(loc,tracker.getElements(sampleName));
}
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
if(sampleName != null) {
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
filteredTracker.add(p);
}
else {
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
filteredTracker.add(p);
}
}
return filteredTracker.size()>0 ? createNewPileup(loc,filteredTracker) : null;
}
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<PileupElement> iterator() {
return new Iterator<PileupElement>() {
private final Iterator<PE> wrappedIterator = pileupElementTracker.iterator();
public boolean hasNext() { return wrappedIterator.hasNext(); }
public PileupElement next() { return wrappedIterator.next(); }
public void remove() { throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); }
};
}
/**
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
* but also need access to the index of the pileup element in the pile.
*
* for (ExtendedPileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
// todo -- reimplement efficiently
// todo -- why is this public?
@Override
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
for ( PileupElement pile : this ) {
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/**
* Simple useful routine to count the number of deletion bases in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return
*/
@Override
public boolean hasSecondaryBases() {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
boolean hasSecondaryBases = false;
for(String sampleName: tracker.getSamples()) {
hasSecondaryBases |= createNewPileup(loc,tracker.getElements(sampleName)).hasSecondaryBases();
}
return hasSecondaryBases;
}
else {
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) )
return true;
}
}
return false;
}
/**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
* to BaseUtils.simpleBaseToBaseIndex for each base.
*
* @return
*/
@Override
public int[] getBaseCounts() {
int[] counts = new int[4];
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
for(String sampleName: tracker.getSamples()) {
int[] countsBySample = createNewPileup(loc,tracker.getElements(sampleName)).getBaseCounts();
for(int i = 0; i < counts.length; i++)
counts[i] += countsBySample[i];
}
}
else {
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
if (index != -1)
counts[index]++;
}
}
}
return counts;
}
@Override
public String getPileupString(Character ref) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
return String.format("%s %s %c %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
ref, // reference base
new String(getBases()),
getQualsString());
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getBases() {
byte[] v = new byte[size()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getBase(); }
return v;
}
/**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getSecondBase(); }
return v;
}
/**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getQuals() {
byte[] v = new byte[size()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getQual(); }
return v;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = (byte)pile.getRead().getMappingQuality(); }
return v;
}
static String quals2String( byte[] quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
qualStr.append(qualChar);
}
return qualStr.toString();
}
private String getQualsString() {
return quals2String(getQuals());
}
}

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.pileup;
import net.sf.picard.util.PeekableIterator;
import java.util.PriorityQueue;
import java.util.Map;
import java.util.Comparator;
import java.util.Iterator;
@ -37,22 +36,15 @@ import java.util.Iterator;
* @author mhanna
* @version 0.1
*/
class MergingPileupElementIterator implements Iterator<PileupElement> {
private final PriorityQueue<PeekableIterator<? extends PileupElement>> perSampleIterators;
class MergingPileupElementIterator<PE extends PileupElement> implements Iterator<PE> {
private final PriorityQueue<PeekableIterator<PE>> perSampleIterators;
public MergingPileupElementIterator(Map<String,? extends Object> pileupsPerSample) {
perSampleIterators = new PriorityQueue<PeekableIterator<? extends PileupElement>>(pileupsPerSample.size(),new PileupElementIteratorComparator());
for(Object pileupForSample: pileupsPerSample.values()) {
if(pileupForSample instanceof ReadBackedPileup) {
ReadBackedPileup pileup = (ReadBackedPileup)pileupForSample;
if(pileup.size() != 0)
perSampleIterators.add(new PeekableIterator<PileupElement>(pileup.iterator()));
}
else if(pileupForSample instanceof ReadBackedExtendedEventPileup) {
ReadBackedExtendedEventPileup pileup = (ReadBackedExtendedEventPileup)pileupForSample;
if(pileup.size() != 0)
perSampleIterators.add(new PeekableIterator<ExtendedEventPileupElement>(pileup.iterator()));
}
public MergingPileupElementIterator(PerSamplePileupElementTracker<PE> tracker) {
perSampleIterators = new PriorityQueue<PeekableIterator<PE>>(tracker.getSamples().size(),new PileupElementIteratorComparator());
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sampleName);
if(trackerPerSample.size() != 0)
perSampleIterators.add(new PeekableIterator<PE>(tracker.iterator()));
}
}
@ -60,9 +52,9 @@ class MergingPileupElementIterator implements Iterator<PileupElement> {
return !perSampleIterators.isEmpty();
}
public PileupElement next() {
PeekableIterator<? extends PileupElement> currentIterator = perSampleIterators.remove();
PileupElement current = currentIterator.next();
public PE next() {
PeekableIterator<PE> currentIterator = perSampleIterators.remove();
PE current = currentIterator.next();
if(currentIterator.hasNext())
perSampleIterators.add(currentIterator);
return current;
@ -75,8 +67,8 @@ class MergingPileupElementIterator implements Iterator<PileupElement> {
/**
* Compares two peekable iterators consisting of pileup elements.
*/
private class PileupElementIteratorComparator implements Comparator<PeekableIterator<? extends PileupElement>> {
public int compare(PeekableIterator<? extends PileupElement> lhs, PeekableIterator<? extends PileupElement> rhs) {
private class PileupElementIteratorComparator implements Comparator<PeekableIterator<PE>> {
public int compare(PeekableIterator<PE> lhs, PeekableIterator<PE> rhs) {
return rhs.peek().getOffset() - lhs.peek().getOffset();
}
}

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import java.util.*;
/**
* Javadoc goes here.
*
* @author mhanna
* @version 0.1
*/
abstract class PileupElementTracker<PE extends PileupElement> implements Iterable<PE> {
public abstract int size();
}
class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
private final List<PE> pileup;
public UnifiedPileupElementTracker() { pileup = new LinkedList<PE>(); }
public UnifiedPileupElementTracker(List<PE> pileup) { this.pileup = pileup; }
public void add(PE element) {
pileup.add(element);
}
public PE get(int index) {
return pileup.get(index);
}
public int size() {
return pileup.size();
}
public Iterator<PE> iterator() { return pileup.iterator(); }
}
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
private final Map<String,PileupElementTracker<PE>> pileup;
private int size = 0;
public PerSamplePileupElementTracker() {
pileup = new HashMap<String,PileupElementTracker<PE>>();
}
public PerSamplePileupElementTracker(Map<String,AbstractReadBackedPileup<?,PE>> pileupsBySample) {
pileup = new HashMap<String,PileupElementTracker<PE>>();
for(Map.Entry<String,AbstractReadBackedPileup<?,PE>> entry: pileupsBySample.entrySet()) {
String sampleName = entry.getKey();
AbstractReadBackedPileup<?,PE> pileupBySample = entry.getValue();
pileup.put(sampleName,pileupBySample.pileupElementTracker);
}
}
/**
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<String> getSamples() {
return pileup.keySet();
}
public PileupElementTracker<PE> getElements(final String sample) {
return pileup.get(sample);
}
public void addElements(final String sample, PileupElementTracker<PE> elements) {
pileup.put(sample,elements);
size += elements.size();
}
public Iterator<PE> iterator() { return new MergingPileupElementIterator<PE>(this); }
public int size() {
return size;
}
}

View File

@ -39,7 +39,26 @@ import net.sf.samtools.SAMRecord;
* @author mhanna
* @version 0.1
*/
public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPileupElement> {
public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
public ReadBackedExtendedEventPileup getPileupWithoutDeletions();
/**
* Returns a new ReadBackedPileup where only one read from an overlapping read
* pair is retained. If the two reads in question disagree to their basecall,
* neither read is retained. If they agree on the base, the read with the higher
* quality observation is retained
*
* @return the newly filtered pileup
*/
public ReadBackedExtendedEventPileup getOverlappingFragmentFilteredPileup();
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
@ -49,6 +68,26 @@ public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPil
*/
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads();
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @param minMapQ
* @return
*/
public ReadBackedExtendedEventPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ );
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @return
*/
public ReadBackedExtendedEventPileup getBaseFilteredPileup( int minBaseQ );
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ
* @return
*/
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ );
/**
@ -70,7 +109,9 @@ public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPil
* @param sampleName Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedExtendedEventPileup getPileupForSample(String sampleName);
public ReadBackedExtendedEventPileup getPileupForSample(String sampleName);
public Iterable<ExtendedEventPileupElement> toExtendedIterable();
/**
* Returns the number of deletion events in this pileup
@ -110,8 +151,6 @@ public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPil
*/
public GenomeLoc getLocation();
public String getShortPileupString();
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
@ -147,6 +186,8 @@ public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPil
*/
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases);
public String getShortPileupString();
/**
* Get an array of the mapping qualities
* @return

View File

@ -0,0 +1,214 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
import net.sf.samtools.SAMRecord;
public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<ReadBackedExtendedEventPileup, ExtendedEventPileupElement> implements ReadBackedExtendedEventPileup {
private int nInsertions = 0;
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileupElements) {
super(loc,pileupElements);
}
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) {
super(loc,tracker);
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List<ExtendedEventPileupElement> pileup, int size,
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) {
super(loc,pileup,size,nDeletions,nMQ0Reads);
this.maxDeletionLength = maxDeletionLength;
this.nInsertions = nInsertions;
}
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<?,ExtendedEventPileupElement>> pileupElementsBySample) {
super(loc,new PerSamplePileupElementTracker<ExtendedEventPileupElement>(pileupElementsBySample));
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
@Override
protected void calculateCachedData() {
super.calculateCachedData();
nDeletions = 0;
maxDeletionLength = 0;
for ( ExtendedEventPileupElement p : this.toExtendedIterable()) {
if ( p.isDeletion() ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
}
}
}
@Override
protected ReadBackedExtendedEventPileup createNewPileup(GenomeLoc loc, PileupElementTracker<ExtendedEventPileupElement> tracker) {
return new ReadBackedExtendedEventPileupImpl(loc,tracker);
}
@Override
protected ExtendedEventPileupElement createNewPileupElement(SAMRecord read, int offset) {
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
}
/**
* Returns the number of insertion events in this pileup
*
* @return
*/
@Override
public int getNumberOfInsertions() {
return nInsertions;
}
/** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
* are associated with genomic location immediately before the actual event). If
* there are no deletions at the site, returns 0.
* @return
*/
@Override
public int getMaxDeletionLength() {
return maxDeletionLength;
}
public Iterable<ExtendedEventPileupElement> toExtendedIterable() {
return new Iterable<ExtendedEventPileupElement>() {
public Iterator<ExtendedEventPileupElement> iterator() { return pileupElementTracker.iterator(); }
};
}
/**
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getEvents() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) {
switch ( e.getType() ) {
case INSERTION: v[i] = 'I'; break;
case DELETION: v[i] = 'D'; break;
case NOEVENT: v[i] = '.'; break;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
i++;
}
return v;
}
/** A shortcut for getEventStringsWithCounts(null);
*
* @return
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts() {
return getEventStringsWithCounts(null);
}
@Override
public String getShortPileupString() {
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
// insertion, deletion or no-event, respectively.
return String.format("%s %s E %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
new String(getEvents()) );
}
/** Returns String representation of all distinct extended events (indels) at the site along with
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
* deleted sequence will be used in the string representation (e.g. "-AAC").
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
* gives the number of reads, in which that event was observed
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
Map<String, Integer> events = new HashMap<String,Integer>();
for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) {
Integer cnt;
String indel = null;
switch ( e.getType() ) {
case INSERTION:
indel = "+"+e.getEventBases();
break;
case DELETION:
indel = getDeletionString(e.getEventLength(),refBases);
break;
case NOEVENT: continue;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
cnt = events.get(indel);
if ( cnt == null ) events.put(indel,1);
else events.put(indel,cnt.intValue()+1);
}
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
}
return eventList;
}
/**
* Builds string representation of the deletion event. If refBases is null, the representation will be
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
* deletion length (i.e. size of refBase must be at least length+1).
* @param length
* @param refBases
* @return
*/
private String getDeletionString(int length, byte[] refBases) {
if ( refBases == null ) {
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
} else {
return "-"+new String(refBases,1,length).toUpperCase();
}
}
}

View File

@ -145,7 +145,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement> {
*/
public boolean hasSecondaryBases();
public String getPileupString(char ref);
public String getPileupString(Character ref);
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Map;
public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPileup,PileupElement> implements ReadBackedPileup {
public ReadBackedPileupImpl(GenomeLoc loc) {
super(loc);
}
public ReadBackedPileupImpl(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
super(loc,reads,offsets);
}
public ReadBackedPileupImpl(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
super(loc,reads,offset);
}
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileupElements) {
super(loc,pileupElements);
}
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,AbstractReadBackedPileup<?,PileupElement>> pileupElementsBySample) {
super(loc,new PerSamplePileupElementTracker<PileupElement>(pileupElementsBySample));
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
super(loc,pileup,size,nDeletions,nMQ0Reads);
}
protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker<PileupElement> tracker) {
super(loc,tracker);
}
@Override
protected ReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker<PileupElement> tracker) {
return new ReadBackedPileupImpl(loc,tracker);
}
@Override
protected PileupElement createNewPileupElement(SAMRecord read, int offset) {
return new PileupElement(read,offset);
}
}

View File

@ -1,366 +0,0 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* Extended event pileups, split out by sample.
*
* @author mhanna
* @version 0.1
*/
public class SampleSplitReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup {
private GenomeLoc loc = null;
private Map<String,ReadBackedExtendedEventPileup> pileupBySample = null;
private int size = 0; // cached value of the size of the pileup
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
private int nDeletions = 0; // cached value of the number of deletions
private int nInsertions = 0;
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc) {
this(loc,new HashMap<String,ReadBackedExtendedEventPileup>());
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc Location of this pileup.
* @param pileup Pileup data, split by sample name.
*/
public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc, Map<String,ReadBackedExtendedEventPileup> pileup) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileupBySample = pileup;
for(ReadBackedExtendedEventPileup pileupBySample: pileup.values()) {
this.size += pileupBySample.size();
this.nDeletions += pileupBySample.getNumberOfDeletions();
this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads();
}
}
private void addPileup(final String sampleName, ReadBackedExtendedEventPileup pileup) {
pileupBySample.put(sampleName,pileup);
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
@Override
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc);
for (Map.Entry<String,ReadBackedExtendedEventPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedExtendedEventPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads());
}
return filteredPileup;
} else {
return this;
}
}
@Override
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc);
for (Map.Entry<String,ReadBackedExtendedEventPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedExtendedEventPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getMappingFilteredPileup(minMapQ));
}
return filteredPileup;
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(size)) )
i++;
}
SampleSplitReadBackedExtendedEventPileup downsampledPileup = new SampleSplitReadBackedExtendedEventPileup(getLocation());
int current = 0;
for(Map.Entry<String,ReadBackedExtendedEventPileup> entry: this.pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedExtendedEventPileup pileup = entry.getValue();
List<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for(ExtendedEventPileupElement p: pileup) {
if(positions.contains(current))
filteredPileup.add(p);
}
if(!filteredPileup.isEmpty())
downsampledPileup.addPileup(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,filteredPileup));
current++;
}
return downsampledPileup;
}
/**
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<String> getSamples() {
return pileupBySample.keySet();
}
/**
* Gets the particular subset of this pileup with the given sample name.
* @param sampleName Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedExtendedEventPileup getPileupForSample(String sampleName) {
return pileupBySample.get(sampleName);
}
@Override
public Iterator<ExtendedEventPileupElement> iterator() {
return new ExtendedEventCastingIterator(new MergingPileupElementIterator(pileupBySample));
}
/**
* Returns the number of deletion events in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
/**
* Returns the number of insertion events in this pileup
*
* @return
*/
@Override
public int getNumberOfInsertions() {
return nInsertions;
}
/** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
* are associated with genomic location immediately before the actual event). If
* there are no deletions at the site, returns 0.
* @return
*/
@Override
public int getMaxDeletionLength() {
int maxDeletionLength = 0;
for(ReadBackedExtendedEventPileup pileup: pileupBySample.values())
maxDeletionLength = Math.max(maxDeletionLength,pileup.getMaxDeletionLength());
return maxDeletionLength;
}
/**
* Returns the number of mapping quality zero reads in this pileup.
* @return
*/
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
@Override
public String getShortPileupString() {
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
// insertion, deletion or no-event, respectively.
return String.format("%s %s E %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
new String(getEvents()) );
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getEvents() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) {
switch ( e.getType() ) {
case INSERTION: v[i] = 'I'; break;
case DELETION: v[i] = 'D'; break;
case NOEVENT: v[i] = '.'; break;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
i++;
}
return v;
}
/** A shortcut for getEventStringsWithCounts(null);
*
* @return
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts() {
return getEventStringsWithCounts(null);
}
/** Returns String representation of all distinct extended events (indels) at the site along with
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
* deleted sequence will be used in the string representation (e.g. "-AAC").
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
* gives the number of reads, in which that event was observed
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
Map<String, Integer> events = new HashMap<String,Integer>();
for ( ExtendedEventPileupElement e : this ) {
Integer cnt;
String indel = null;
switch ( e.getType() ) {
case INSERTION:
indel = "+"+e.getEventBases();
break;
case DELETION:
indel = UnifiedReadBackedExtendedEventPileup.getDeletionString(e.getEventLength(),refBases);
break;
case NOEVENT: continue;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
cnt = events.get(indel);
if ( cnt == null ) events.put(indel,1);
else events.put(indel,cnt.intValue()+1);
}
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
}
return eventList;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
return v;
}
private class ExtendedEventCastingIterator implements Iterator<ExtendedEventPileupElement> {
private final Iterator<PileupElement> wrappedIterator;
public ExtendedEventCastingIterator(Iterator<PileupElement> iterator) {
this.wrappedIterator = iterator;
}
public boolean hasNext() { return wrappedIterator.hasNext(); }
public ExtendedEventPileupElement next() { return (ExtendedEventPileupElement)wrappedIterator.next(); }
public void remove() { wrappedIterator.remove(); }
}
}

View File

@ -1,418 +0,0 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import java.util.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
/**
* A read-backed pileup that internally divides the dataset based
* on sample, for super efficient per-sample access.
*
* TODO: there are a few functions that are shared between UnifiedReadBackedPileup and SampleSplitReadBackedPileup.
* TODO: refactor these into a common class.
*
* @author mhanna
* @version 0.1
*/
public class SampleSplitReadBackedPileup implements ReadBackedPileup {
private GenomeLoc loc = null;
private Map<String,ReadBackedPileup> pileupBySample = null;
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
public SampleSplitReadBackedPileup(GenomeLoc loc) {
this(loc,new HashMap<String,ReadBackedPileup>());
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc Location of this pileup.
* @param pileup Pileup data, split by sample name.
*/
public SampleSplitReadBackedPileup(GenomeLoc loc, Map<String,ReadBackedPileup> pileup) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileupBySample = pileup;
for(ReadBackedPileup pileupBySample: pileup.values()) {
this.size += pileupBySample.size();
this.nDeletions += pileupBySample.getNumberOfDeletions();
this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads();
}
}
private void addPileup(final String sampleName, ReadBackedPileup pileup) {
pileupBySample.put(sampleName,pileup);
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
@Override
public ReadBackedPileup getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutDeletions());
}
return filteredPileup;
} else {
return this;
}
}
/**
* Returns a new ReadBackedPileup where only one read from an overlapping read
* pair is retained. If the two reads in question disagree to their basecall,
* neither read is retained. If they agree on the base, the read with the higher
* quality observation is retained
*
* @return the newly filtered pileup
*/
@Override
public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getOverlappingFragmentFilteredPileup());
}
return filteredPileup;
}
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads());
}
return filteredPileup;
}
/**
* Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getBaseAndMappingFilteredPileup(minBaseQ,minMapQ));
}
return filteredPileup;
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @return
*/
@Override
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
}
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
return getBaseAndMappingFilteredPileup(-1, minMapQ);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(size)) )
i++;
}
SampleSplitReadBackedPileup downsampledPileup = new SampleSplitReadBackedPileup(getLocation());
int current = 0;
for(Map.Entry<String,ReadBackedPileup> entry: this.pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for(PileupElement p: pileup) {
if(positions.contains(current))
filteredPileup.add(p);
}
if(!filteredPileup.isEmpty())
downsampledPileup.addPileup(sampleName,new UnifiedReadBackedPileup(loc,filteredPileup));
current++;
}
return downsampledPileup;
}
public Collection<String> getSamples() {
return pileupBySample.keySet();
}
@Override
public ReadBackedPileup getPileupForSample(String sampleName) {
return pileupBySample.containsKey(sampleName) ? pileupBySample.get(sampleName) : null;
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<PileupElement> iterator() {
return new MergingPileupElementIterator(pileupBySample);
}
// todo -- why is this public?
@Override
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
Iterator<PileupElement> iterator = new MergingPileupElementIterator(pileupBySample);
while(iterator.hasNext()) {
PileupElement pile = iterator.next();
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/**
* Simple useful routine to count the number of deletion bases in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
/**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
* to BaseUtils.simpleBaseToBaseIndex for each base.
*
* @return
*/
@Override
public int[] getBaseCounts() {
int[] counts = new int[4];
for(ReadBackedPileup pileup: pileupBySample.values()) {
int[] countsBySample = pileup.getBaseCounts();
for(int i = 0; i < counts.length; i++)
counts[i] += countsBySample[i];
}
return counts;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return
*/
@Override
public boolean hasSecondaryBases() {
boolean hasSecondaryBases = false;
for(ReadBackedPileup pileup: pileupBySample.values())
hasSecondaryBases |= pileup.hasSecondaryBases();
return hasSecondaryBases;
}
@Override
public String getPileupString(char ref) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
return String.format("%s %s %c %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
ref, // reference base
new String(getBases()),
getQualsString());
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
/**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
/**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
return v;
}
private String getQualsString() {
return UnifiedReadBackedPileup.quals2String(getQuals());
}
}

View File

@ -1,448 +0,0 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Dec 29, 2009
* Time: 12:25:55 PM
* To change this template use File | Settings | File Templates.
*/
public class UnifiedReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup {
private GenomeLoc loc = null;
private List<ExtendedEventPileupElement> pileup = null;
private int size = 0; // cached value of the size of the pileup
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
private int nDeletions = 0; // cached value of the number of deletions
private int nInsertions = 0;
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc without any aligned reads
*
*/
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc ) {
this(loc, new ArrayList<ExtendedEventPileupElement>(0));
}
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List<ExtendedEventPileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
this.loc = loc;
this.pileup = pileup;
calculatedCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List<ExtendedEventPileupElement> pileup, int size,
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.maxDeletionLength = maxDeletionLength;
this.nDeletions = nDeletions;
this.nInsertions = nInsertions;
this.nMQ0Reads = nMQ0Reads;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
nInsertions = 0;
nMQ0Reads = 0;
for ( ExtendedEventPileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
} else {
if ( p.isInsertion() ) nInsertions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
// --------------------------------------------------------
//
// Special 'constructors'
//
// --------------------------------------------------------
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
@Override
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup);
} else {
return this;
}
}
@Override
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
Iterator positionIter = positions.iterator();
ArrayList<ExtendedEventPileupElement> downsampledPileup = new ArrayList<ExtendedEventPileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new UnifiedReadBackedExtendedEventPileup(getLocation(), downsampledPileup);
}
@Override
public Collection<String> getSamples() {
Collection<String> sampleNames = new HashSet<String>();
for(PileupElement p: this) {
SAMRecord read = p.getRead();
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
sampleNames.add(sampleName);
}
return sampleNames;
}
@Override
public ReadBackedExtendedEventPileup getPileupForSample(String sampleName) {
List<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for(ExtendedEventPileupElement p: this) {
SAMRecord read = p.getRead();
if(sampleName != null) {
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
filteredPileup.add(p);
}
else {
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
filteredPileup.add(p);
}
}
return filteredPileup.size()>0 ? new UnifiedReadBackedExtendedEventPileup(loc,filteredPileup) : null;
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<ExtendedEventPileupElement> iterator() {
return pileup.iterator();
}
/**
* Returns the number of deletion events in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
/**
* Returns the number of insertion events in this pileup
*
* @return
*/
@Override
public int getNumberOfInsertions() {
return nInsertions;
}
/** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
* are associated with genomic location immediately before the actual event). If
* there are no deletions at the site, returns 0.
* @return
*/
@Override
public int getMaxDeletionLength() {
return maxDeletionLength;
}
/**
* Returns the number of mapping quality zero reads in this pileup.
* @return
*/
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
// public int getNumberOfDeletions() {
// int n = 0;
//
// for ( int i = 0; i < size(); i++ ) {
// if ( getOffsets().get(i) != -1 ) { n++; }
// }
//
// return n;
// }
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
@Override
public String getShortPileupString() {
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
// insertion, deletion or no-event, respectively.
return String.format("%s %s E %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
new String(getEvents()) );
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getEvents() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) {
switch ( e.getType() ) {
case INSERTION: v[i] = 'I'; break;
case DELETION: v[i] = 'D'; break;
case NOEVENT: v[i] = '.'; break;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
i++;
}
return v;
}
/** A shortcut for getEventStringsWithCounts(null);
*
* @return
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts() {
return getEventStringsWithCounts(null);
}
/** Returns String representation of all distinct extended events (indels) at the site along with
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
* deleted sequence will be used in the string representation (e.g. "-AAC").
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
* gives the number of reads, in which that event was observed
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
Map<String, Integer> events = new HashMap<String,Integer>();
for ( ExtendedEventPileupElement e : this ) {
Integer cnt;
String indel = null;
switch ( e.getType() ) {
case INSERTION:
indel = "+"+e.getEventBases();
break;
case DELETION:
indel = getDeletionString(e.getEventLength(),refBases);
break;
case NOEVENT: continue;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
cnt = events.get(indel);
if ( cnt == null ) events.put(indel,1);
else events.put(indel,cnt.intValue()+1);
}
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
}
return eventList;
}
/**
* Builds string representation of the deletion event. If refBases is null, the representation will be
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
* deletion length (i.e. size of refBase must be at least length+1).
* @param length
* @param refBases
* @return
*/
static String getDeletionString(int length, byte[] refBases) {
if ( refBases == null ) {
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
} else {
return "-"+new String(refBases,1,length).toUpperCase();
}
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
return v;
}
}

View File

@ -1,561 +0,0 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* Version two file implementing pileups of bases in reads at a locus.
*
* @author Mark DePristo
*/
public class UnifiedReadBackedPileup implements ReadBackedPileup {
private GenomeLoc loc = null;
private List<PileupElement> pileup = null;
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
* go changing the reads.
*
* @param loc
* @param reads
* @param offsets
*/
public UnifiedReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
this(loc, readsOffsets2Pileup(reads, offsets));
}
public UnifiedReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
this(loc, readsOffsets2Pileup(reads, offset));
}
/**
* Create a new version of a read backed pileup at loc without any aligned reads
*
*/
public UnifiedReadBackedPileup(GenomeLoc loc ) {
this(loc, new ArrayList<PileupElement>(0));
}
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public UnifiedReadBackedPileup(GenomeLoc loc, List<PileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup");
this.loc = loc;
this.pileup = pileup;
calculatedCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public UnifiedReadBackedPileup(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.nDeletions = nDeletions;
this.nMQ0Reads = nMQ0Reads;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
nMQ0Reads = 0;
for ( PileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
/**
* Helper routine for converting reads and offset lists to a PileupElement list.
*
* @param reads
* @param offsets
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offsets == null ) throw new StingException("Illegal null offsets list in UnifiedReadBackedPileup");
if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!");
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) );
}
return pileup;
}
/**
* Helper routine for converting reads and a single offset to a PileupElement list.
*
* @param reads
* @param offset
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offset < 0 ) throw new StingException("Illegal offset < 0 UnifiedReadBackedPileup");
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add( new PileupElement( reads.get(i), offset ) );
}
return pileup;
}
// --------------------------------------------------------
//
// Special 'constructors'
//
// --------------------------------------------------------
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
@Override
public ReadBackedPileup getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( !p.isDeletion() ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedPileup(loc, filteredPileup);
} else {
return this;
}
}
/**
* Returns a new ReadBackedPileup where only one read from an overlapping read
* pair is retained. If the two reads in question disagree to their basecall,
* neither read is retained. If they agree on the base, the read with the higher
* quality observation is retained
*
* @return the newly filtered pileup
*/
@Override
public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
Map<String, PileupElement> filteredPileup = new HashMap<String, PileupElement>();
for ( PileupElement p : pileup ) {
String readName = p.getRead().getReadName();
// if we've never seen this read before, life is good
if (!filteredPileup.containsKey(readName)) {
filteredPileup.put(readName, p);
} else {
PileupElement existing = filteredPileup.get(readName);
// if the reads disagree at this position, throw them both out. Otherwise
// keep the element with the higher quality score
if (existing.getBase() != p.getBase()) {
filteredPileup.remove(readName);
} else {
if (existing.getQual() < p.getQual()) {
filteredPileup.put(readName, p);
}
}
}
}
return new UnifiedReadBackedPileup(loc, new ArrayList<PileupElement>(filteredPileup.values()));
}
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
@Override
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedPileup(loc, filteredPileup);
} else {
return this;
}
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedPileup(loc, filteredPileup);
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @return
*/
@Override
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
}
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
return getBaseAndMappingFilteredPileup(-1, minMapQ);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
Iterator positionIter = positions.iterator();
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new UnifiedReadBackedPileup(getLocation(), downsampledPileup);
}
@Override
public Collection<String> getSamples() {
Collection<String> sampleNames = new HashSet<String>();
for(PileupElement p: this) {
SAMRecord read = p.getRead();
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
sampleNames.add(sampleName);
}
return sampleNames;
}
@Override
public ReadBackedPileup getPileupForSample(String sampleName) {
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for(PileupElement p: this) {
SAMRecord read = p.getRead();
if(sampleName != null) {
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
filteredPileup.add(p);
}
else {
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
filteredPileup.add(p);
}
}
return filteredPileup.size()>0 ? new UnifiedReadBackedPileup(loc,filteredPileup) : null;
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<PileupElement> iterator() {
return pileup.iterator();
}
/**
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
* but also need access to the index of the pileup element in the pile.
*
* for (ExtendedPileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
// todo -- reimplement efficiently
// todo -- why is this public?
@Override
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
for ( PileupElement pile : this ) {
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/**
* Simple useful routine to count the number of deletion bases in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
/**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
* to BaseUtils.simpleBaseToBaseIndex for each base.
*
* @return
*/
@Override
public int[] getBaseCounts() {
int[] counts = new int[4];
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
if (index != -1)
counts[index]++;
}
}
return counts;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return
*/
@Override
public boolean hasSecondaryBases() {
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) )
return true;
}
return false;
}
@Override
public String getPileupString(char ref) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
return String.format("%s %s %c %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
ref, // reference base
new String(getBases()),
getQualsString());
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
/**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
/**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
return v;
}
//
// Private functions for printing pileups
//
private String getMappingQualsString() {
return quals2String(getMappingQuals());
}
static String quals2String( byte[] quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
qualStr.append(qualChar);
}
return qualStr.toString();
}
private String getQualsString() {
return quals2String(getQuals());
}
@Override
public String toString() {
StringBuilder s = new StringBuilder();
s.append(getLocation());
s.append(": ");
for ( PileupElement p : this ) {
s.append((char)p.getBase());
}
return s.toString();
}
}