Rethinking DownsamplingLocusIteratorByState with a flattened read structure. Samples are kept

independent while processing, and only merged back in a priority queue if necessary in a special
variant of the ReadBackedPileup.  This code is not live yet except in the case of naive deduping.
Downsampling by sample temporarily disabled, and the ReadBackedPileup variant is sketchy and
not well integrated with StratifiedAlignmentContext or the walkers.  Cleanup to follow.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3540 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-06-13 01:47:02 +00:00
parent 804facb0cc
commit c3b68cc58d
19 changed files with 2464 additions and 1067 deletions

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*; import java.util.*;
@ -80,7 +81,7 @@ public class AlignmentContext {
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context"); if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc; this.loc = loc;
this.basePileup = new ReadBackedPileup(loc, reads, offsets); this.basePileup = new UnifiedReadBackedPileup(loc, reads, offsets);
this.skippedBases = skippedBases; this.skippedBases = skippedBases;
} }

View File

@ -90,9 +90,9 @@ public class StratifiedAlignmentContext {
int index = type.ordinal(); int index = type.ordinal();
if ( contexts[index] == null ) { if ( contexts[index] == null ) {
if ( isExtended ) { if ( isExtended ) {
contexts[index] = new AlignmentContext(loc , new ReadBackedExtendedEventPileup(loc, (ArrayList<ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)getPileupElements(type)))); contexts[index] = new AlignmentContext(loc , new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList<ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)getPileupElements(type))));
} else { } else {
contexts[index] = new AlignmentContext(loc, new ReadBackedPileup(loc, getPileupElements(type))); contexts[index] = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, getPileupElements(type)));
} }
} }
return contexts[index]; return contexts[index];
@ -300,7 +300,7 @@ public class StratifiedAlignmentContext {
// dirty trick below. generics do not allow to cast pe (ArrayList<PileupElement>) directly to ArrayList<ExtendedEventPileupElement>, // dirty trick below. generics do not allow to cast pe (ArrayList<PileupElement>) directly to ArrayList<ExtendedEventPileupElement>,
// so we first cast to "? extends" wildcard, then to what we actually need. // so we first cast to "? extends" wildcard, then to what we actually need.
if ( isExtended ) return new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)pe)) ); if ( isExtended ) return new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList<? extends PileupElement>)pe)) );
else return new AlignmentContext(loc, new ReadBackedPileup(loc,pe)); else return new AlignmentContext(loc, new UnifiedReadBackedPileup(loc,pe));
} }
} }

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.MergingIterator; import org.broadinstitute.sting.utils.collections.MergingIterator;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker;
import java.util.*; import java.util.*;
@ -136,7 +137,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
// calculate the number of skipped bases, and update lastLoc so we can do that again in the next() // calculate the number of skipped bases, and update lastLoc so we can do that again in the next()
long skippedBases = getSkippedBases( rodSite ); long skippedBases = getSkippedBases( rodSite );
lastLoc = site; lastLoc = site;
return new AlignmentContext(site, new ReadBackedPileup(site), skippedBases); return new AlignmentContext(site, new UnifiedReadBackedPileup(site), skippedBases);
} }
public LocusOverflowTracker getLocusOverflowTracker() { public LocusOverflowTracker getLocusOverflowTracker() {

View File

@ -34,10 +34,7 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import java.util.*; import java.util.*;
@ -308,11 +305,15 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
} }
public void printState() { public void printState() {
for ( SAMRecordState state : readStates ) { for(String sampleName: sampleNames) {
logger.debug(String.format("printState():")); Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
SAMRecord read = state.getRead(); while(iterator.hasNext()) {
int offset = state.getReadOffset(); SAMRecordState state = iterator.next();
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString())); logger.debug(String.format("printState():"));
SAMRecord read = state.getRead();
int offset = state.getReadOffset();
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
}
} }
} }
@ -341,7 +342,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
private void lazyLoadNextAlignmentContext() { private void lazyLoadNextAlignmentContext() {
while(nextAlignmentContext == null && readStates.hasNext()) { while(nextAlignmentContext == null && readStates.hasNext()) {
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref: // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
collectPendingReads(readInfo.getMaxReadsAtLocus()); readStates.collectPendingReads();
int size = 0; int size = 0;
int nDeletions = 0; int nDeletions = 0;
@ -355,93 +356,112 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base // In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position. // and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) { if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size()); Map<String,ReadBackedExtendedEventPileup> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileup>();
int maxDeletionLength = 0;
for ( SAMRecordState state : readStates ) {
if ( state.hadIndel() ) {
size++;
if ( state.getEventBases() == null ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
}
else nInsertions++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
state.getReadEventStartOffset(),
state.getEventLength(),
state.getEventBases()) );
} else {
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
// this read has no indel associated with the previous position on the ref;
// we count this read in only if it has actual bases, not N span...
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
// if cigar operator is D but the read has no extended event reported (that's why we ended
// up in this branch), it means that we are currently inside a deletion that started earlier;
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
// about to report) only if includeReadsWithDeletionAtLoci is true.
size++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
state.getReadOffset()-1,
-1) // length=-1 --> noevent
);
}
}
}
if ( state.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
SAMRecordState our1stState = readStates.getFirst(); SAMRecordState our1stState = readStates.getFirst();
// get current location on the reference and decrement it by 1: the indels we just stepped over // get current location on the reference and decrement it by 1: the indels we just stepped over
// are associated with the *previous* reference base // are associated with the *previous* reference base
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1); GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
size = 0;
nDeletions = 0;
nInsertions = 0;
nMQ0Reads = 0;
int maxDeletionLength = 0;
while(iterator.hasNext()) {
SAMRecordState state = iterator.next();
if ( state.hadIndel() ) {
size++;
if ( state.getEventBases() == null ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
}
else nInsertions++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
state.getReadEventStartOffset(),
state.getEventLength(),
state.getEventBases()) );
} else {
if ( state.getCurrentCigarOperator() != CigarOperator.N ) {
// this read has no indel associated with the previous position on the ref;
// we count this read in only if it has actual bases, not N span...
if ( state.getCurrentCigarOperator() != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci() ) {
// if cigar operator is D but the read has no extended event reported (that's why we ended
// up in this branch), it means that we are currently inside a deletion that started earlier;
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
// about to report) only if includeReadsWithDeletionAtLoci is true.
size++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
state.getReadOffset()-1,
-1) // length=-1 --> noevent
);
}
}
}
if ( state.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads));
}
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
// System.out.println("Indel(s) at "+loc); // System.out.println("Indel(s) at "+loc);
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); } // for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads)); nextAlignmentContext = new AlignmentContext(loc, new SampleSplitReadBackedExtendedEventPileup(loc, fullExtendedEventPileup));
} else { } else {
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size()); GenomeLoc location = getLocation();
Map<String,ReadBackedPileup> fullPileup = new HashMap<String,ReadBackedPileup>();
// todo -- performance problem -- should be lazy, really // todo -- performance problem -- should be lazy, really
GenomeLoc location = getLocation(); for(String sampleName: sampleNames) {
for ( SAMRecordState state : readStates ) { Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
if ( filterRead(state.getRead(), location.getStart(), filters ) ) {
discarded_bases++;
//printStatus("Adaptor bases", discarded_adaptor_bases);
continue;
} else {
observed_bases++;
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
size++;
}
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
size++;
pile.add(new PileupElement(state.getRead(), -1));
nDeletions++;
}
// todo -- this looks like a bug w.r.t. including reads with deletion at loci -- MAD 05/27/10 size = 0;
if ( state.getRead().getMappingQuality() == 0 ) { nDeletions = 0;
nMQ0Reads++; nInsertions = 0;
nMQ0Reads = 0;
while(iterator.hasNext()) {
SAMRecordState state = iterator.next();
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
if ( filterRead(state.getRead(), location.getStart(), filters ) ) {
discarded_bases++;
//printStatus("Adaptor bases", discarded_adaptor_bases);
continue;
} else {
observed_bases++;
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
size++;
}
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
size++;
pile.add(new PileupElement(state.getRead(), -1));
nDeletions++;
}
// todo -- this looks like a bug w.r.t. including reads with deletion at loci -- MAD 05/27/10
if ( state.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
} }
if( pile.size() != 0 ) fullPileup.put(sampleName,new UnifiedReadBackedPileup(location,pile,size,nDeletions,nMQ0Reads));
} }
updateReadStates(); // critical - must be called after we get the current state offsets and location updateReadStates(); // critical - must be called after we get the current state offsets and location
// if we got reads with non-D/N over the current position, we are done // if we got reads with non-D/N over the current position, we are done
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileup(location, pile, size, nDeletions, nMQ0Reads)); if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new SampleSplitReadBackedPileup(location, fullPileup));
} }
} }
} }
private void collectPendingReads(int maxReads) {
readStates.collectPendingReads();
}
// fast testing of position // fast testing of position
private boolean readIsPastCurrentPosition(SAMRecord read) { private boolean readIsPastCurrentPosition(SAMRecord read) {
if ( readStates.isEmpty() ) if ( readStates.isEmpty() )
@ -475,18 +495,20 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
} }
private void updateReadStates() { private void updateReadStates() {
Iterator<SAMRecordState> it = readStates.iterator(); for(String sampleName: sampleNames) {
while ( it.hasNext() ) { Iterator<SAMRecordState> it = readStates.iteratorForSample(sampleName);
SAMRecordState state = it.next(); while ( it.hasNext() ) {
CigarOperator op = state.stepForwardOnGenome(); SAMRecordState state = it.next();
if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true; CigarOperator op = state.stepForwardOnGenome();
else { if ( state.hadIndel() && readInfo.generateExtendedEvents() ) hasExtendedEvents = true;
// we discard the read only when we are past its end AND indel at the end of the read (if any) was else {
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe // we discard the read only when we are past its end AND indel at the end of the read (if any) was
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
if ( op == null ) { // we've stepped off the end of the object // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
//if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart())); if ( op == null ) { // we've stepped off the end of the object
it.remove(); //if (DEBUG) logger.debug(String.format(" removing read %s at %d", state.getRead().getReadName(), state.getRead().getAlignmentStart()));
it.remove();
}
} }
} }
} }
@ -512,16 +534,18 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
return this.overflowTracker; return this.overflowTracker;
} }
private class ReadStateManager implements Iterable<SAMRecordState> { private class ReadStateManager {
private final PeekableIterator<SAMRecord> iterator; private final PeekableIterator<SAMRecord> iterator;
private final DownsamplingMethod downsamplingMethod; private final DownsamplingMethod downsamplingMethod;
private final Map<String,Collection<SAMRecord>> aggregatorsBySampleName = new HashMap<String,Collection<SAMRecord>>(); private final ReadSelector firstReadSelector;
private final SamplePartitioner samplePartitioner;
private final Map<String,Deque<SAMRecordState>> readStatesBySample = new HashMap<String,Deque<SAMRecordState>>();
private final int targetCoverage; private final int targetCoverage;
private final int maxReadsAtLocus; private final int maxReadsAtLocus;
private final Map<String,Deque<List<SAMRecordState>>> readStatesBySample;
private int totalReadStatesInHanger = 0; private int totalReadStatesInHanger = 0;
/** /**
@ -547,104 +571,59 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
this.targetCoverage = Integer.MAX_VALUE; this.targetCoverage = Integer.MAX_VALUE;
} }
this.maxReadsAtLocus = maxReadsAtLocus; this.maxReadsAtLocus = maxReadsAtLocus;
this.readStatesBySample = new HashMap<String,Deque<List<SAMRecordState>>>();
if(downsamplingMethod.type == DownsampleType.NONE) { samplePartitioner = new SamplePartitioner(sampleNames);
aggregatorsBySampleName.put(null,new ArrayList<SAMRecord>()); for(String sampleName: sampleNames)
readStatesBySample.put(null,new LinkedList<List<SAMRecordState>>()); readStatesBySample.put(sampleName,new LinkedList<SAMRecordState>());
}
else if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) { ReadSelector primaryReadSelector;
aggregatorsBySampleName.put(null,new ReservoirDownsampler<SAMRecord>(targetCoverage)); if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
readStatesBySample.put(null,new LinkedList<List<SAMRecordState>>()); primaryReadSelector = new NRandomReadSelector(samplePartitioner,targetCoverage);
}
else {
for(String sampleName: sampleNames) {
aggregatorsBySampleName.put(sampleName,new ReservoirDownsampler<SAMRecord>(targetCoverage));
readStatesBySample.put(sampleName,new LinkedList<List<SAMRecordState>>());
}
} }
else
primaryReadSelector = samplePartitioner;
firstReadSelector = maxReadsAtLocus!=Integer.MAX_VALUE ? new FirstNReadSelector(primaryReadSelector,maxReadsAtLocus) : primaryReadSelector;
} }
public Iterator<SAMRecordState> iterator() { public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
return new Iterator<SAMRecordState>() { return new Iterator<SAMRecordState>() {
private final Iterator<Iterator<List<SAMRecordState>>> sampleIterators; private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sampleName).iterator();
private Iterator<List<SAMRecordState>> sampleIterator;
private List<SAMRecordState> currentAlignmentStart;
private Iterator<SAMRecordState> alignmentStartIterator;
private SAMRecordState nextReadState;
private int readsInHanger = totalReadStatesInHanger;
{
List<Iterator<List<SAMRecordState>>> sampleIteratorList = new LinkedList<Iterator<List<SAMRecordState>>>();
for(Deque<List<SAMRecordState>> hanger: readStatesBySample.values())
sampleIteratorList.add(hanger.iterator());
sampleIterators = sampleIteratorList.iterator();
advance();
}
public boolean hasNext() { public boolean hasNext() {
return readsInHanger > 0; return wrappedIterator.hasNext();
} }
public SAMRecordState next() { public SAMRecordState next() {
advance(); return wrappedIterator.next();
if(nextReadState==null) throw new NoSuchElementException("reader is out of elements");
try {
return nextReadState;
}
finally {
readsInHanger--;
nextReadState = null;
}
} }
public void remove() { public void remove() {
if(alignmentStartIterator == null) wrappedIterator.remove();
throw new StingException("Cannot remove read -- iterator is in an invalid state.");
alignmentStartIterator.remove();
if(currentAlignmentStart.isEmpty())
sampleIterator.remove();
totalReadStatesInHanger--; totalReadStatesInHanger--;
} }
private void advance() {
// nextReadState != null indicates that we haven't returned this value from the next() method yet.
if(nextReadState != null)
return;
while(alignmentStartIterator!=null&&alignmentStartIterator.hasNext()) {
nextReadState = alignmentStartIterator.next();
}
while(nextReadState==null&&sampleIterator!=null&&sampleIterator.hasNext()) {
currentAlignmentStart = sampleIterator.next();
alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null;
nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
}
while(nextReadState==null&&sampleIterators.hasNext()) {
sampleIterator = sampleIterators.next();
currentAlignmentStart = sampleIterator!=null&&sampleIterator.hasNext() ? sampleIterator.next() : null;
alignmentStartIterator = currentAlignmentStart!=null ? currentAlignmentStart.iterator() : null;
nextReadState = alignmentStartIterator!=null&&alignmentStartIterator.hasNext() ? alignmentStartIterator.next() : null;
}
}
}; };
} }
public boolean isEmpty() { public boolean isEmpty() {
return readStatesBySample.isEmpty(); return totalReadStatesInHanger == 0;
} }
public int size() { public int size() {
int size = 0; int size = 0;
for(Deque<List<SAMRecordState>> readStatesByAlignmentStart: readStatesBySample.values()) { for(Deque<SAMRecordState> readStates: readStatesBySample.values()) {
for(Collection<SAMRecordState> readStates: readStatesByAlignmentStart) size += readStates.size();
size += readStates.size();
} }
return size; return size;
} }
public SAMRecordState getFirst() { public SAMRecordState getFirst() {
return iterator().next(); for(String sampleName: sampleNames) {
Deque<SAMRecordState> reads = readStatesBySample.get(sampleName);
if(!reads.isEmpty())
return reads.peek();
}
return null;
} }
public boolean hasNext() { public boolean hasNext() {
@ -652,38 +631,36 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
} }
public void collectPendingReads() { public void collectPendingReads() {
if(iterator.hasNext() && readStates.size() == 0) { if(!iterator.hasNext())
return;
if(readStates.size() == 0) {
int firstContigIndex = iterator.peek().getReferenceIndex(); int firstContigIndex = iterator.peek().getReferenceIndex();
int firstAlignmentStart = iterator.peek().getAlignmentStart(); int firstAlignmentStart = iterator.peek().getAlignmentStart();
while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) { while(iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
SAMRecord read = iterator.next(); firstReadSelector.submitRead(iterator.next());
Collection<SAMRecord> aggregator = getAggregator(read.getReadGroup()!=null ? read.getReadGroup().getSample() : null);
aggregator.add(read);
} }
} }
else { else {
// Fast fail in the case that the read is past the current position. // Fast fail in the case that the read is past the current position.
if(iterator.hasNext() && readIsPastCurrentPosition(iterator.peek())) if(readIsPastCurrentPosition(iterator.peek()))
return; return;
while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) { while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
SAMRecord read = iterator.next(); firstReadSelector.submitRead(iterator.next());
Collection<SAMRecord> aggregator = getAggregator(read.getReadGroup()!=null ? read.getReadGroup().getSample() : null);
aggregator.add(read);
} }
} }
firstReadSelector.complete();
int readStatesInHangerEntry = 0; int readStatesInHangerEntry = 0;
for(Map.Entry<String,Collection<SAMRecord>> entry: aggregatorsBySampleName.entrySet()) { for(String sampleName: sampleNames) {
String sampleName = entry.getKey(); ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName);
Collection<SAMRecord> aggregator = entry.getValue();
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator); Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
aggregator.clear();
Deque<List<SAMRecordState>> hanger = readStatesBySample.get(sampleName); Deque<SAMRecordState> hanger = readStatesBySample.get(sampleName);
int readsInHanger = countReadsInHanger(sampleName); int readsInHanger = hanger.size();
if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) { if(readsInHanger+newReads.size()<=targetCoverage || downsamplingMethod.type==DownsampleType.NONE || downsamplingMethod.type==DownsampleType.EXPERIMENTAL_NAIVE_DUPLICATE_ELIMINATOR) {
int readLimit = newReads.size(); int readLimit = newReads.size();
@ -692,9 +669,11 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
readLimit = maxReadsAtLocus-totalReadStatesInHanger; readLimit = maxReadsAtLocus-totalReadStatesInHanger;
mrlViolation = true; mrlViolation = true;
} }
readStatesInHangerEntry += addReadsToHanger(hanger,newReads,readLimit,mrlViolation); readStatesInHangerEntry += addReadsToSample(hanger,newReads,readLimit,mrlViolation);
} }
else { else {
// TODO: implement downsampling mechanism
/*
Iterator<List<SAMRecordState>> backIterator = hanger.descendingIterator(); Iterator<List<SAMRecordState>> backIterator = hanger.descendingIterator();
boolean readPruned = true; boolean readPruned = true;
while(readsInHanger+newReads.size()>targetCoverage && readPruned) { while(readsInHanger+newReads.size()>targetCoverage && readPruned) {
@ -714,36 +693,23 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
readsInHanger -= readsInFirstHanger.size(); readsInHanger -= readsInFirstHanger.size();
} }
readStatesInHangerEntry += addReadsToHanger(hanger,newReads,targetCoverage-readsInHanger,false); readStatesInHangerEntry += addReadsToSample(hanger,newReads,targetCoverage-readsInHanger,false);
*/
} }
totalReadStatesInHanger += readStatesInHangerEntry; totalReadStatesInHanger += readStatesInHangerEntry;
} }
} firstReadSelector.reset();
private Collection<SAMRecord> getAggregator(String sampleName) {
if(downsamplingMethod.type == DownsampleType.EXPERIMENTAL_BY_SAMPLE)
return aggregatorsBySampleName.get(sampleName);
else
return aggregatorsBySampleName.get(null);
}
private int countReadsInHanger(final String sampleName) {
int count = 0;
for(List<SAMRecordState> hangerEntry: readStatesBySample.get(sampleName)) {
count += hangerEntry.size();
}
return count;
} }
/** /**
* Add reads with the given sample name to the given hanger entry. * Add reads with the given sample name to the given hanger entry.
* @param newHangerEntry The hanger entry to add. * @param readStates The list of read states to add this collection of reads.
* @param reads Reads to add. Selected reads will be pulled from this source. * @param reads Reads to add. Selected reads will be pulled from this source.
* @param maxReads Maximum number of reads to add. * @param maxReads Maximum number of reads to add.
* @return Total number of reads added. * @return Total number of reads added.
*/ */
private int addReadsToHanger(final Deque<List<SAMRecordState>> newHangerEntry, final Collection<SAMRecord> reads, final int maxReads, boolean atMaxReadsAtLocusLimit) { private int addReadsToSample(final Deque<SAMRecordState> readStates, final Collection<SAMRecord> reads, final int maxReads, boolean atMaxReadsAtLocusLimit) {
if(reads.isEmpty()) if(reads.isEmpty())
return 0; return 0;
@ -752,7 +718,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
// the farthest right a read extends // the farthest right a read extends
Integer rightMostEnd = -1; Integer rightMostEnd = -1;
List<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
int readCount = 0; int readCount = 0;
for(SAMRecord read: reads) { for(SAMRecord read: reads) {
if(readCount <= maxReads) { if(readCount <= maxReads) {
@ -769,7 +734,6 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd; rightMostEnd = (read.getAlignmentEnd() > rightMostEnd) ? read.getAlignmentEnd() : rightMostEnd;
} }
} }
newHangerEntry.add(readStates);
if (location != null) if (location != null)
overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd), overflowTracker.exceeded(GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart(),rightMostEnd),
@ -780,3 +744,247 @@ public class DownsamplingLocusIteratorByState extends LocusIterator {
} }
} }
/**
* Selects reads passed to it based on a criteria decided through inheritance.
*/
interface ReadSelector {
/**
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
* @param read the read to evaluate.
*/
public void submitRead(SAMRecord read);
/**
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
* @param read the read previously rejected.
*/
public void notifyReadRejected(SAMRecord read);
/**
* Signal the selector that read additions are complete.
*/
public void complete();
/**
* Retrieve the number of reads seen by this selector so far.
* @return number of reads seen.
*/
public long getNumReadsSeen();
/**
* Return the number of reads accepted by this selector so far.
* @return number of reads selected.
*/
public long getNumReadsSelected();
/**
* Get the reads selected by this selector.
* @return collection of reads selected by this selector.
*/
public Collection<SAMRecord> getSelectedReads();
/**
* Reset this collection to its pre-gathered state.
*/
public void reset();
}
/**
* Choose the first N reads from the submitted set.
*/
class FirstNReadSelector implements ReadSelector {
private final ReadSelector chainedSelector;
private final Collection<SAMRecord> selectedReads = new LinkedList<SAMRecord>();
private final long readLimit;
private long readsSeen = 0;
public FirstNReadSelector(ReadSelector chainedSelector, long readLimit) {
this.chainedSelector = chainedSelector;
this.readLimit = readLimit;
}
public void submitRead(SAMRecord read) {
if(readsSeen > readLimit) {
selectedReads.add(read);
if(chainedSelector != null)
chainedSelector.submitRead(read);
}
else
if(chainedSelector != null)
chainedSelector.notifyReadRejected(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
if(chainedSelector != null)
chainedSelector.notifyReadRejected(read);
readsSeen++;
}
public void complete() {
if(chainedSelector != null)
chainedSelector.complete();
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return selectedReads.size();
}
public Collection<SAMRecord> getSelectedReads() {
return selectedReads;
}
public void reset() {
selectedReads.clear();
readsSeen = 0;
if(chainedSelector != null)
chainedSelector.reset();
}
}
/**
* Select N reads randomly from the input stream.
*/
class NRandomReadSelector implements ReadSelector {
private final ReservoirDownsampler<SAMRecord> reservoir;
private final ReadSelector chainedSelector;
private long readsSeen = 0;
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
this.reservoir = new ReservoirDownsampler<SAMRecord>((int)readLimit);
this.chainedSelector = chainedSelector;
}
public void submitRead(SAMRecord read) {
SAMRecord displaced = reservoir.add(read);
if(displaced != null && chainedSelector != null)
chainedSelector.notifyReadRejected(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
}
public void complete() {
for(SAMRecord read: reservoir.getDownsampledContents())
chainedSelector.submitRead(read);
if(chainedSelector != null)
chainedSelector.complete();
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return reservoir.size();
}
public Collection<SAMRecord> getSelectedReads() {
return reservoir.getDownsampledContents();
}
public void reset() {
reservoir.clear();
if(chainedSelector != null)
chainedSelector.reset();
}
}
class SamplePartitioner implements ReadSelector {
private final Map<String,SampleStorage> readsBySample;
private long readsSeen = 0;
public SamplePartitioner(Collection<String> sampleNames) {
readsBySample = new HashMap<String,SampleStorage>();
for(String sampleName: sampleNames)
readsBySample.put(sampleName,new SampleStorage());
}
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
if(readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).submitRead(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
String sampleName = read.getReadGroup()!=null ? read.getReadGroup().getSample() : null;
if(readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).notifyReadRejected(read);
readsSeen++;
}
public void complete() {
// NO-OP.
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return readsSeen;
}
public Collection<SAMRecord> getSelectedReads() {
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
}
public ReadSelector getSelectedReads(String sampleName) {
if(!readsBySample.containsKey(sampleName))
throw new NoSuchElementException("Sample name not found");
return readsBySample.get(sampleName);
}
public void reset() {
for(SampleStorage storage: readsBySample.values())
storage.reset();
readsSeen = 0;
}
private class SampleStorage implements ReadSelector {
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
private long readsSeen = 0;
public void submitRead(SAMRecord read) {
reads.add(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
}
public void complete() {
// NO-OP.
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return readsSeen;
}
public Collection<SAMRecord> getSelectedReads() {
return reads;
}
public void reset() {
reads.clear();
readsSeen = 0;
}
}
}

View File

@ -33,10 +33,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import java.util.*; import java.util.*;
@ -399,7 +396,7 @@ public class LocusIteratorByState extends LocusIterator {
GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1); GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1);
// System.out.println("Indel(s) at "+loc); // System.out.println("Indel(s) at "+loc);
// for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); } // for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); }
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads)); nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
} else { } else {
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size()); ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
@ -430,7 +427,7 @@ public class LocusIteratorByState extends LocusIterator {
GenomeLoc loc = getLocation(); GenomeLoc loc = getLocation();
updateReadStates(); // critical - must be called after we get the current state offsets and location updateReadStates(); // critical - must be called after we get the current state offsets and location
// if we got reads with non-D/N over the current position, we are done // if we got reads with non-D/N over the current position, we are done
if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads)); if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
} }
} }
} }

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import java.util.*; import java.util.*;
@ -183,7 +184,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size())); if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
// Jump forward in the reference to this locus location // Jump forward in the reference to this locus location
AlignmentContext locus = new AlignmentContext(site, new ReadBackedPileup(site)); AlignmentContext locus = new AlignmentContext(site, new UnifiedReadBackedPileup(site));
// update the number of duplicate sets we've seen // update the number of duplicate sets we've seen
TraversalStatistics.nRecords++; TraversalStatistics.nRecords++;

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
/** /**
* A simple solution to iterating over all reference positions over a series of genomic locations. * A simple solution to iterating over all reference positions over a series of genomic locations.
@ -92,7 +93,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
long nSkipped = rodLocusView.getLastSkippedBases(); long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) { if ( nSkipped > 0 ) {
GenomeLoc site = rodLocusView.getLocOneBeyondShard(); GenomeLoc site = rodLocusView.getLocOneBeyondShard();
AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped); AlignmentContext ac = new AlignmentContext(site, new UnifiedReadBackedPileup(site), nSkipped);
M x = walker.map(null, null, ac); M x = walker.map(null, null, ac);
sum = walker.reduce(x, sum); sum = walker.reduce(x, sum);
} }

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import java.util.*; import java.util.*;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
@ -265,7 +266,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
} }
} }
return new ReadBackedPileup(pileup.getLocation(), reads, offsets); return new UnifiedReadBackedPileup(pileup.getLocation(), reads, offsets);
} }
private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) { private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) {
@ -278,7 +279,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
} }
} }
return new ReadBackedPileup(pileup.getLocation(), filteredPileup); return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup);
} }
public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); } public List<String> getKeyNames() { return Arrays.asList("HaplotypeScore"); }

View File

@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import org.broad.tribble.vcf.VCFHeaderLine; import org.broad.tribble.vcf.VCFHeaderLine;
import java.util.*; import java.util.*;
@ -158,7 +159,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
if ( readGroup != null && samples.contains(readGroup.getSample()) ) if ( readGroup != null && samples.contains(readGroup.getSample()) )
newPileup.add(p); newPileup.add(p);
} }
return new AlignmentContext(pileup.getLocation(), new ReadBackedPileup(pileup.getLocation(), newPileup)); return new AlignmentContext(pileup.getLocation(), new UnifiedReadBackedPileup(pileup.getLocation(), newPileup));
} }

View File

@ -45,10 +45,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -241,7 +238,7 @@ public class UnifiedGenotyperEngine {
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES ) AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p); filteredPileup.add(p);
} }
return new ReadBackedPileup(pileup.getLocation(), filteredPileup); return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup);
} }
// filter based on maximum mismatches and bad mates // filter based on maximum mismatches and bad mates
@ -253,7 +250,7 @@ public class UnifiedGenotyperEngine {
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES ) AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p); filteredPileup.add(p);
} }
return new ReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup); return new UnifiedReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup);
} }
private static boolean isValidDeletionFraction(double d) { private static boolean isValidDeletionFraction(double d) {

View File

@ -13,7 +13,7 @@ import java.util.*;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class ReservoirDownsampler<T> implements Collection<T> { public class ReservoirDownsampler<T> {
/** /**
* Create a random number generator with a random, but reproducible, seed. * Create a random number generator with a random, but reproducible, seed.
*/ */
@ -40,31 +40,35 @@ public class ReservoirDownsampler<T> implements Collection<T> {
this.maxElements = maxElements; this.maxElements = maxElements;
} }
@Override /**
public boolean add(T element) { * Returns the eliminated element.
* @param element Eliminated element; null if no element has been eliminated.
* @return
*/
public T add(T element) {
if(maxElements <= 0) if(maxElements <= 0)
return false; return element;
else if(reservoir.size() < maxElements) { else if(reservoir.size() < maxElements) {
reservoir.add(element); reservoir.add(element);
return true; return null;
} }
else { else {
// Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry. // Get a uniformly distributed int. If the chosen slot lives within the partition, replace the entry in that slot with the newest entry.
int slot = random.nextInt(maxElements); int slot = random.nextInt(maxElements);
if(slot >= 0 && slot < maxElements) { if(slot >= 0 && slot < maxElements) {
T displaced = reservoir.get(slot);
reservoir.set(slot,element); reservoir.set(slot,element);
return true; return displaced;
} }
else else
return false; return element;
} }
} }
@Override
public boolean addAll(Collection<? extends T> elements) { public boolean addAll(Collection<? extends T> elements) {
boolean added = false; boolean added = false;
for(T element: elements) for(T element: elements)
added |= add(element); added |= (add(element) != null);
return added; return added;
} }
@ -73,62 +77,51 @@ public class ReservoirDownsampler<T> implements Collection<T> {
* @return The downsampled contents of this reservoir. * @return The downsampled contents of this reservoir.
*/ */
public Collection<T> getDownsampledContents() { public Collection<T> getDownsampledContents() {
return (Collection<T>)reservoir.clone(); return reservoir;
} }
@Override
public void clear() { public void clear() {
reservoir.clear(); reservoir.clear();
} }
@Override
public boolean isEmpty() { public boolean isEmpty() {
return reservoir.isEmpty(); return reservoir.isEmpty();
} }
@Override
public int size() { public int size() {
return reservoir.size(); return reservoir.size();
} }
@Override
public Iterator<T> iterator() { public Iterator<T> iterator() {
return reservoir.iterator(); return reservoir.iterator();
} }
@Override
public boolean contains(Object o) { public boolean contains(Object o) {
return reservoir.contains(o); return reservoir.contains(o);
} }
@Override
public boolean containsAll(Collection<?> elements) { public boolean containsAll(Collection<?> elements) {
return reservoir.containsAll(elements); return reservoir.containsAll(elements);
} }
@Override
public boolean retainAll(Collection<?> elements) { public boolean retainAll(Collection<?> elements) {
return reservoir.retainAll(elements); return reservoir.retainAll(elements);
} }
@Override
public boolean remove(Object o) { public boolean remove(Object o) {
return reservoir.remove(o); return reservoir.remove(o);
} }
@Override
public boolean removeAll(Collection<?> elements) { public boolean removeAll(Collection<?> elements) {
return reservoir.removeAll(elements); return reservoir.removeAll(elements);
} }
@Override
public Object[] toArray() { public Object[] toArray() {
Object[] contents = new Object[reservoir.size()]; Object[] contents = new Object[reservoir.size()];
reservoir.toArray(contents); reservoir.toArray(contents);
return contents; return contents;
} }
@Override
public <T> T[] toArray(T[] array) { public <T> T[] toArray(T[] array) {
return reservoir.toArray(array); return reservoir.toArray(array);
} }

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup;
import java.util.List; import java.util.List;
import java.util.Arrays; import java.util.Arrays;
@ -114,7 +115,7 @@ public class DupUtils {
private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) { private static Pair<Byte, Byte> combineBaseProbs(List<SAMRecord> duplicates, int readOffset, int maxQScore) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0)); GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0));
ReadBackedPileup pileup = new ReadBackedPileup(loc, duplicates, readOffset); ReadBackedPileup pileup = new UnifiedReadBackedPileup(loc, duplicates, readOffset);
final boolean debug = false; final boolean debug = false;

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.picard.util.PeekableIterator;
import java.util.PriorityQueue;
import java.util.Map;
import java.util.Comparator;
import java.util.Iterator;
/**
* Merges multiple pileups broken down by sample.
*
* @author mhanna
* @version 0.1
*/
class MergingPileupElementIterator implements Iterator<PileupElement> {
private final PriorityQueue<PeekableIterator<? extends PileupElement>> perSampleIterators;
public MergingPileupElementIterator(Map<String,? extends Object> pileupsPerSample) {
perSampleIterators = new PriorityQueue<PeekableIterator<? extends PileupElement>>(pileupsPerSample.size(),new PileupElementIteratorComparator());
for(Object pileupForSample: pileupsPerSample.values()) {
if(pileupForSample instanceof ReadBackedPileup) {
ReadBackedPileup pileup = (ReadBackedPileup)pileupForSample;
if(pileup.size() != 0)
perSampleIterators.add(new PeekableIterator<PileupElement>(pileup.iterator()));
}
else if(pileupForSample instanceof ReadBackedExtendedEventPileup) {
ReadBackedExtendedEventPileup pileup = (ReadBackedExtendedEventPileup)pileupForSample;
if(pileup.size() != 0)
perSampleIterators.add(new PeekableIterator<ExtendedEventPileupElement>(pileup.iterator()));
}
}
}
public boolean hasNext() {
return !perSampleIterators.isEmpty();
}
public PileupElement next() {
PeekableIterator<? extends PileupElement> currentIterator = perSampleIterators.remove();
PileupElement current = currentIterator.next();
if(currentIterator.hasNext())
perSampleIterators.add(currentIterator);
return current;
}
public void remove() {
throw new UnsupportedOperationException("Cannot remove from a merging iterator.");
}
/**
* Compares two peekable iterators consisting of pileup elements.
*/
private class PileupElementIteratorComparator implements Comparator<PeekableIterator<? extends PileupElement>> {
public int compare(PeekableIterator<? extends PileupElement> lhs, PeekableIterator<? extends PileupElement> rhs) {
return rhs.peek().getOffset() - lhs.peek().getOffset();
}
}
}

View File

@ -1,426 +1,141 @@
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2010, The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without * files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, * restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell * copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the * copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following * Software is furnished to do so, subject to the following
* conditions: * conditions:
* *
* The above copyright notice and this permission notice shall be * The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software. * included in all copies or substantial portions of the Software.
* * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * OTHER DEALINGS IN THE SOFTWARE.
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
*/
package org.broadinstitute.sting.utils.pileup;
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair; import java.util.Iterator;
import java.util.List;
import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecord;
/**
/** * A clean interface for working with extended event pileups.
* Created by IntelliJ IDEA. *
* User: asivache * @author mhanna
* Date: Dec 29, 2009 * @version 0.1
* Time: 12:25:55 PM */
* To change this template use File | Settings | File Templates. public interface ReadBackedExtendedEventPileup extends Iterable<ExtendedEventPileupElement> {
*/ /**
public class ReadBackedExtendedEventPileup implements Iterable<ExtendedEventPileupElement> { * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
private GenomeLoc loc = null; * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
private ArrayList<ExtendedEventPileupElement> pileup = null; * of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
private int size = 0; // cached value of the size of the pileup * @return
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site */
private int nDeletions = 0; // cached value of the number of deletions public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads();
private int nInsertions = 0;
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ );
/** /**
* Create a new version of a read backed pileup at loc without any aligned reads * Returns a pileup randomly downsampled to the desiredCoverage.
* *
*/ * @param desiredCoverage
public ReadBackedExtendedEventPileup(GenomeLoc loc ) { * @return
this(loc, new ArrayList<ExtendedEventPileupElement>(0)); */
} public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage);
/** /**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding * Returns the number of deletion events in this pileup
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a *
* pointer to pileup. Don't go changing the data in pileup. * @return
* */
*/ public int getNumberOfDeletions();
public ReadBackedExtendedEventPileup(GenomeLoc loc, ArrayList<ExtendedEventPileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup"); /**
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup"); * Returns the number of insertion events in this pileup
*
this.loc = loc; * @return
this.pileup = pileup; */
public int getNumberOfInsertions();
calculatedCachedData();
} /** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
/** * are associated with genomic location immediately before the actual event). If
* Optimization of above constructor where all of the cached data is provided * there are no deletions at the site, returns 0.
* @param loc * @return
* @param pileup */
*/ public int getMaxDeletionLength();
public ReadBackedExtendedEventPileup(GenomeLoc loc, ArrayList<ExtendedEventPileupElement> pileup, int size,
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) { /**
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup"); * Returns the number of mapping quality zero reads in this pileup.
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup"); * @return
*/
this.loc = loc; public int getNumberOfMappingQualityZeroReads();
this.pileup = pileup;
this.size = size; /**
this.maxDeletionLength = maxDeletionLength; * @return the number of elements in this pileup
this.nDeletions = nDeletions; */
this.nInsertions = nInsertions; public int size();
this.nMQ0Reads = nMQ0Reads;
} /**
* @return the location of this pileup
*/
/** public GenomeLoc getLocation();
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling public String getShortPileupString();
* sizes, nDeletion, etc. over and over potentially.
*/ /**
private void calculatedCachedData() { * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
size = 0; * @return
nDeletions = 0; */
nInsertions = 0; public List<SAMRecord> getReads();
nMQ0Reads = 0;
/**
for ( ExtendedEventPileupElement p : this ) { * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
size++; */
if ( p.isDeletion() ) { public List<Integer> getOffsets();
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength()); /**
} else { * Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
if ( p.isInsertion() ) nInsertions++; * @return
} */
public byte[] getEvents();
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++; /** A shortcut for getEventStringsWithCounts(null);
} *
} * @return
} */
public List<Pair<String,Integer>> getEventStringsWithCounts();
// -------------------------------------------------------- /** Returns String representation of all distinct extended events (indels) at the site along with
// * observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
// Special 'constructors' * deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
// * deleted sequence will be used in the string representation (e.g. "-AAC").
// -------------------------------------------------------- * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
/** * gives the number of reads, in which that event was observed
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this */
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases);
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
* /**
* @return * Get an array of the mapping qualities
*/ * @return
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() { */
public byte[] getMappingQuals();
if ( getNumberOfMappingQualityZeroReads() > 0 ) { }
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredPileup.add(p);
}
}
return new ReadBackedExtendedEventPileup(loc, filteredPileup);
} else {
return this;
}
}
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ ) {
filteredPileup.add(p);
}
}
return new ReadBackedExtendedEventPileup(loc, filteredPileup);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
Iterator positionIter = positions.iterator();
ArrayList<ExtendedEventPileupElement> downsampledPileup = new ArrayList<ExtendedEventPileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new ReadBackedExtendedEventPileup(getLocation(), downsampledPileup);
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
public Iterator<ExtendedEventPileupElement> iterator() {
return pileup.iterator();
}
/**
* Returns the number of deletion events in this pileup
*
* @return
*/
public int getNumberOfDeletions() {
return nDeletions;
}
/**
* Returns the number of insertion events in this pileup
*
* @return
*/
public int getNumberOfInsertions() {
return nInsertions;
}
/** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
* are associated with genomic location immediately before the actual event). If
* there are no deletions at the site, returns 0.
* @return
*/
public int getMaxDeletionLength() {
return maxDeletionLength;
}
/**
* Returns the number of mapping quality zero reads in this pileup.
* @return
*/
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
// public int getNumberOfDeletions() {
// int n = 0;
//
// for ( int i = 0; i < size(); i++ ) {
// if ( getOffsets().get(i) != -1 ) { n++; }
// }
//
// return n;
// }
/**
* @return the number of elements in this pileup
*/
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
public GenomeLoc getLocation() {
return loc;
}
public String getShortPileupString() {
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
// insertion, deletion or no-event, respectively.
return String.format("%s %s E %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
new String(getEvents()) );
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
* @return
*/
public byte[] getEvents() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) {
switch ( e.getType() ) {
case INSERTION: v[i] = 'I'; break;
case DELETION: v[i] = 'D'; break;
case NOEVENT: v[i] = '.'; break;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
i++;
}
return v;
}
/** A shortcut for getEventStringsWithCounts(null);
*
* @return
*/
public List<Pair<String,Integer>> getEventStringsWithCounts() {
return getEventStringsWithCounts(null);
}
/** Returns String representation of all distinct extended events (indels) at the site along with
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
* deleted sequence will be used in the string representation (e.g. "-AAC").
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
* gives the number of reads, in which that event was observed
*/
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
Map<String, Integer> events = new HashMap<String,Integer>();
for ( ExtendedEventPileupElement e : this ) {
Integer cnt;
String indel = null;
switch ( e.getType() ) {
case INSERTION:
indel = "+"+e.getEventBases();
break;
case DELETION:
indel = getDeletionString(e.getEventLength(),refBases);
break;
case NOEVENT: continue;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
cnt = events.get(indel);
if ( cnt == null ) events.put(indel,1);
else events.put(indel,cnt.intValue()+1);
}
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
}
return eventList;
}
/**
* Builds string representation of the deletion event. If refBases is null, the representation will be
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
* deletion length (i.e. size of refBase must be at least length+1).
* @param length
* @param refBases
* @return
*/
private String getDeletionString(int length, byte[] refBases) {
if ( refBases == null ) {
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
} else {
return "-"+new String(refBases,1,length).toUpperCase();
}
}
/**
* Get an array of the mapping qualities
* @return
*/
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
return v;
}
//
// Private functions for printing pileups
//
private String getMappingQualsString() {
return quals2String(getMappingQuals());
}
private static String quals2String( byte[] quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
qualStr.append(qualChar);
}
return qualStr.toString();
}
}

View File

@ -1,157 +1,42 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup; package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import java.util.List;
/** /**
* Version two file implementing pileups of bases in reads at a locus. * A data retrieval interface for accessing parts of the pileup.
* *
* @author Mark DePristo * @author mhanna
* @version 0.1
*/ */
public class ReadBackedPileup implements Iterable<PileupElement> { public interface ReadBackedPileup extends Iterable<PileupElement> {
private GenomeLoc loc = null;
private ArrayList<PileupElement> pileup = null;
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
* go changing the reads.
*
* @param loc
* @param reads
* @param offsets
*/
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
this(loc, readsOffsets2Pileup(reads, offsets));
}
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
this(loc, readsOffsets2Pileup(reads, offset));
}
/**
* Create a new version of a read backed pileup at loc without any aligned reads
*
*/
public ReadBackedPileup(GenomeLoc loc ) {
this(loc, new ArrayList<PileupElement>(0));
}
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
this.loc = loc;
this.pileup = pileup;
calculatedCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.nDeletions = nDeletions;
this.nMQ0Reads = nMQ0Reads;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
nMQ0Reads = 0;
for ( PileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
/**
* Helper routine for converting reads and offset lists to a PileupElement list.
*
* @param reads
* @param offsets
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2");
if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!");
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) );
}
return pileup;
}
/**
* Helper routine for converting reads and a single offset to a PileupElement list.
*
* @param reads
* @param offset
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
if ( offset < 0 ) throw new StingException("Illegal offset < 0 ReadBackedPileup2");
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add( new PileupElement( reads.get(i), offset ) );
}
return pileup;
}
// --------------------------------------------------------
//
// Special 'constructors'
//
// --------------------------------------------------------
/** /**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
@ -159,20 +44,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* *
* @return * @return
*/ */
public ReadBackedPileup getPileupWithoutDeletions() { public ReadBackedPileup getPileupWithoutDeletions();
if ( getNumberOfDeletions() > 0 ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( !p.isDeletion() ) {
filteredPileup.add(p);
}
}
return new ReadBackedPileup(loc, filteredPileup);
} else {
return this;
}
}
/** /**
* Returns a new ReadBackedPileup where only one read from an overlapping read * Returns a new ReadBackedPileup where only one read from an overlapping read
@ -182,32 +54,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* *
* @return the newly filtered pileup * @return the newly filtered pileup
*/ */
public ReadBackedPileup getOverlappingFragementFilteredPileup() { public ReadBackedPileup getOverlappingFragmentFilteredPileup();
Map<String, PileupElement> filteredPileup = new HashMap<String, PileupElement>();
for ( PileupElement p : pileup ) {
String readName = p.getRead().getReadName();
// if we've never seen this read before, life is good
if (!filteredPileup.containsKey(readName)) {
filteredPileup.put(readName, p);
} else {
PileupElement existing = filteredPileup.get(readName);
// if the reads disagree at this position, throw them both out. Otherwise
// keep the element with the higher quality score
if (existing.getBase() != p.getBase()) {
filteredPileup.remove(readName);
} else {
if (existing.getQual() < p.getQual()) {
filteredPileup.put(readName, p);
}
}
}
}
return new ReadBackedPileup(loc, new ArrayList<PileupElement>(filteredPileup.values()));
}
/** /**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
@ -216,20 +63,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* *
* @return * @return
*/ */
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() { public ReadBackedPileup getPileupWithoutMappingQualityZeroReads();
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredPileup.add(p);
}
}
return new ReadBackedPileup(loc, filteredPileup);
} else {
return this;
}
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from /** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
@ -237,35 +71,21 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @param minMapQ * @param minMapQ
* @return * @return
*/ */
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) { public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ );
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
filteredPileup.add(p);
}
}
return new ReadBackedPileup(loc, filteredPileup);
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ. /** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup. * This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ * @param minBaseQ
* @return * @return
*/ */
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) { public ReadBackedPileup getBaseFilteredPileup( int minBaseQ );
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
}
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. /** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup. * This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ * @param minMapQ
* @return * @return
*/ */
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) { public ReadBackedPileup getMappingFilteredPileup( int minMapQ );
return getBaseAndMappingFilteredPileup(-1, minMapQ);
}
/** /**
* Returns a pileup randomly downsampled to the desiredCoverage. * Returns a pileup randomly downsampled to the desiredCoverage.
@ -273,97 +93,36 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @param desiredCoverage * @param desiredCoverage
* @return * @return
*/ */
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) { public ReadBackedPileup getDownsampledPileup(int desiredCoverage);
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
Iterator positionIter = positions.iterator();
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new ReadBackedPileup(getLocation(), downsampledPileup);
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/** /**
* The best way to access PileupElements where you only care about the bases and quals in the pileup. * Gets the particular subset of this pileup with the given sample name.
* * @param sampleName Name of the sample to use.
* for (PileupElement p : this) { doSomething(p); } * @return A subset of this pileup containing only reads with the given sample.
*
* Provides efficient iteration of the data.
*
* @return
*/ */
public Iterator<PileupElement> iterator() { public ReadBackedPileup getPileupForSample(String sampleName);
return pileup.iterator();
} // todo -- delete or make private
public IterableIterator<ExtendedPileupElement> extendedForeachIterator();
/**
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
* but also need access to the index of the pileup element in the pile.
*
* for (ExtendedPileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
// todo -- reimplement efficiently
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
for ( PileupElement pile : this ) {
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/** /**
* Simple useful routine to count the number of deletion bases in this pileup * Simple useful routine to count the number of deletion bases in this pileup
* *
* @return * @return
*/ */
public int getNumberOfDeletions() { public int getNumberOfDeletions();
return nDeletions;
}
public int getNumberOfMappingQualityZeroReads() { public int getNumberOfMappingQualityZeroReads();
return nMQ0Reads;
}
/** /**
* @return the number of elements in this pileup * @return the number of elements in this pileup
*/ */
public int size() { public int size();
return size;
}
/** /**
* @return the location of this pileup * @return the location of this pileup
*/ */
public GenomeLoc getLocation() { public GenomeLoc getLocation();
return loc;
}
/** /**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
@ -371,143 +130,49 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* *
* @return * @return
*/ */
public int[] getBaseCounts() { public int[] getBaseCounts();
int[] counts = new int[4];
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
if (index != -1)
counts[index]++;
}
}
return counts;
}
/** /**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated * Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return * @return
*/ */
public boolean hasSecondaryBases() { public boolean hasSecondaryBases();
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) )
return true;
}
return false; public String getPileupString(char ref);
}
public String getPileupString(char ref) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
return String.format("%s %s %c %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
ref, // reference base
new String(getBases()),
getQualsString());
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/** /**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return * @return
*/ */
public List<SAMRecord> getReads() { public List<SAMRecord> getReads();
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/** /**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return * @return
*/ */
public List<Integer> getOffsets() { public List<Integer> getOffsets();
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/** /**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return * @return
*/ */
public byte[] getBases() { public byte[] getBases();
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
/** /**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time * Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return * @return
*/ */
public byte[] getSecondaryBases() { public byte[] getSecondaryBases();
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
/** /**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return * @return
*/ */
public byte[] getQuals() { public byte[] getQuals();
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
/** /**
* Get an array of the mapping qualities * Get an array of the mapping qualities
* @return * @return
*/ */
public byte[] getMappingQuals() { public byte[] getMappingQuals();
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
return v;
}
//
// Private functions for printing pileups
//
private String getMappingQualsString() {
return quals2String(getMappingQuals());
}
private static String quals2String( byte[] quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
qualStr.append(qualChar);
}
return qualStr.toString();
}
private String getQualsString() {
return quals2String(getQuals());
}
public String toString() {
StringBuilder s = new StringBuilder();
s.append(getLocation());
s.append(": ");
for ( PileupElement p : this ) {
s.append((char)p.getBase());
}
return s.toString();
}
} }

View File

@ -0,0 +1,348 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* Extended event pileups, split out by sample.
*
* @author mhanna
* @version 0.1
*/
public class SampleSplitReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup {
private GenomeLoc loc = null;
private Map<String,ReadBackedExtendedEventPileup> pileupBySample = null;
private int size = 0; // cached value of the size of the pileup
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
private int nDeletions = 0; // cached value of the number of deletions
private int nInsertions = 0;
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc) {
this(loc,new HashMap<String,ReadBackedExtendedEventPileup>());
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc Location of this pileup.
* @param pileup Pileup data, split by sample name.
*/
public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc, Map<String,ReadBackedExtendedEventPileup> pileup) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileupBySample = pileup;
for(ReadBackedExtendedEventPileup pileupBySample: pileup.values()) {
this.size += pileupBySample.size();
this.nDeletions += pileupBySample.getNumberOfDeletions();
this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads();
}
}
private void addPileup(final String sampleName, ReadBackedExtendedEventPileup pileup) {
pileupBySample.put(sampleName,pileup);
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
@Override
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc);
for (Map.Entry<String,ReadBackedExtendedEventPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedExtendedEventPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads());
}
return filteredPileup;
} else {
return this;
}
}
@Override
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc);
for (Map.Entry<String,ReadBackedExtendedEventPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedExtendedEventPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getMappingFilteredPileup(minMapQ));
}
return filteredPileup;
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(size)) )
i++;
}
SampleSplitReadBackedExtendedEventPileup downsampledPileup = new SampleSplitReadBackedExtendedEventPileup(getLocation());
int current = 0;
for(Map.Entry<String,ReadBackedExtendedEventPileup> entry: this.pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedExtendedEventPileup pileup = entry.getValue();
List<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for(ExtendedEventPileupElement p: pileup) {
if(positions.contains(current))
filteredPileup.add(p);
}
if(!filteredPileup.isEmpty())
downsampledPileup.addPileup(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,filteredPileup));
current++;
}
return downsampledPileup;
}
@Override
public Iterator<ExtendedEventPileupElement> iterator() {
return new ExtendedEventCastingIterator(new MergingPileupElementIterator(pileupBySample));
}
/**
* Returns the number of deletion events in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
/**
* Returns the number of insertion events in this pileup
*
* @return
*/
@Override
public int getNumberOfInsertions() {
return nInsertions;
}
/** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
* are associated with genomic location immediately before the actual event). If
* there are no deletions at the site, returns 0.
* @return
*/
@Override
public int getMaxDeletionLength() {
int maxDeletionLength = 0;
for(ReadBackedExtendedEventPileup pileup: pileupBySample.values())
maxDeletionLength = Math.max(maxDeletionLength,pileup.getMaxDeletionLength());
return maxDeletionLength;
}
/**
* Returns the number of mapping quality zero reads in this pileup.
* @return
*/
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
@Override
public String getShortPileupString() {
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
// insertion, deletion or no-event, respectively.
return String.format("%s %s E %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
new String(getEvents()) );
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getEvents() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) {
switch ( e.getType() ) {
case INSERTION: v[i] = 'I'; break;
case DELETION: v[i] = 'D'; break;
case NOEVENT: v[i] = '.'; break;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
i++;
}
return v;
}
/** A shortcut for getEventStringsWithCounts(null);
*
* @return
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts() {
return getEventStringsWithCounts(null);
}
/** Returns String representation of all distinct extended events (indels) at the site along with
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
* deleted sequence will be used in the string representation (e.g. "-AAC").
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
* gives the number of reads, in which that event was observed
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
Map<String, Integer> events = new HashMap<String,Integer>();
for ( ExtendedEventPileupElement e : this ) {
Integer cnt;
String indel = null;
switch ( e.getType() ) {
case INSERTION:
indel = "+"+e.getEventBases();
break;
case DELETION:
indel = UnifiedReadBackedExtendedEventPileup.getDeletionString(e.getEventLength(),refBases);
break;
case NOEVENT: continue;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
cnt = events.get(indel);
if ( cnt == null ) events.put(indel,1);
else events.put(indel,cnt.intValue()+1);
}
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
}
return eventList;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
return v;
}
private class ExtendedEventCastingIterator implements Iterator<ExtendedEventPileupElement> {
private final Iterator<PileupElement> wrappedIterator;
public ExtendedEventCastingIterator(Iterator<PileupElement> iterator) {
this.wrappedIterator = iterator;
}
public boolean hasNext() { return wrappedIterator.hasNext(); }
public ExtendedEventPileupElement next() { return (ExtendedEventPileupElement)wrappedIterator.next(); }
public void remove() { wrappedIterator.remove(); }
}
}

View File

@ -0,0 +1,414 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import java.util.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
/**
* A read-backed pileup that internally divides the dataset based
* on sample, for super efficient per-sample access.
*
* TODO: there are a few functions that are shared between UnifiedReadBackedPileup and SampleSplitReadBackedPileup.
* TODO: refactor these into a common class.
*
* @author mhanna
* @version 0.1
*/
public class SampleSplitReadBackedPileup implements ReadBackedPileup {
private GenomeLoc loc = null;
private Map<String,ReadBackedPileup> pileupBySample = null;
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
public SampleSplitReadBackedPileup(GenomeLoc loc) {
this(loc,new HashMap<String,ReadBackedPileup>());
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc Location of this pileup.
* @param pileup Pileup data, split by sample name.
*/
public SampleSplitReadBackedPileup(GenomeLoc loc, Map<String,ReadBackedPileup> pileup) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileupBySample = pileup;
for(ReadBackedPileup pileupBySample: pileup.values()) {
this.size += pileupBySample.size();
this.nDeletions += pileupBySample.getNumberOfDeletions();
this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads();
}
}
private void addPileup(final String sampleName, ReadBackedPileup pileup) {
pileupBySample.put(sampleName,pileup);
size += pileup.size();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
@Override
public ReadBackedPileup getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutDeletions());
}
return filteredPileup;
} else {
return this;
}
}
/**
* Returns a new ReadBackedPileup where only one read from an overlapping read
* pair is retained. If the two reads in question disagree to their basecall,
* neither read is retained. If they agree on the base, the read with the higher
* quality observation is retained
*
* @return the newly filtered pileup
*/
@Override
public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getOverlappingFragmentFilteredPileup());
}
return filteredPileup;
}
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads());
}
return filteredPileup;
}
/**
* Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc);
for (Map.Entry<String,ReadBackedPileup> entry: pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
filteredPileup.addPileup(sampleName,pileup.getBaseAndMappingFilteredPileup(minBaseQ,minMapQ));
}
return filteredPileup;
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @return
*/
@Override
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
}
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
return getBaseAndMappingFilteredPileup(-1, minMapQ);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(size)) )
i++;
}
SampleSplitReadBackedPileup downsampledPileup = new SampleSplitReadBackedPileup(getLocation());
int current = 0;
for(Map.Entry<String,ReadBackedPileup> entry: this.pileupBySample.entrySet()) {
String sampleName = entry.getKey();
ReadBackedPileup pileup = entry.getValue();
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for(PileupElement p: pileup) {
if(positions.contains(current))
filteredPileup.add(p);
}
if(!filteredPileup.isEmpty())
downsampledPileup.addPileup(sampleName,new UnifiedReadBackedPileup(loc,filteredPileup));
current++;
}
return downsampledPileup;
}
@Override
public ReadBackedPileup getPileupForSample(String sampleName) {
return pileupBySample.containsKey(sampleName) ? pileupBySample.get(sampleName) : null;
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<PileupElement> iterator() {
return new MergingPileupElementIterator(pileupBySample);
}
// todo -- why is this public?
@Override
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
Iterator<PileupElement> iterator = new MergingPileupElementIterator(pileupBySample);
while(iterator.hasNext()) {
PileupElement pile = iterator.next();
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/**
* Simple useful routine to count the number of deletion bases in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
/**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
* to BaseUtils.simpleBaseToBaseIndex for each base.
*
* @return
*/
@Override
public int[] getBaseCounts() {
int[] counts = new int[4];
for(ReadBackedPileup pileup: pileupBySample.values()) {
int[] countsBySample = pileup.getBaseCounts();
for(int i = 0; i < counts.length; i++)
counts[i] += countsBySample[i];
}
return counts;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return
*/
@Override
public boolean hasSecondaryBases() {
boolean hasSecondaryBases = false;
for(ReadBackedPileup pileup: pileupBySample.values())
hasSecondaryBases |= pileup.hasSecondaryBases();
return hasSecondaryBases;
}
@Override
public String getPileupString(char ref) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
return String.format("%s %s %c %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
ref, // reference base
new String(getBases()),
getQualsString());
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
/**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
/**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
return v;
}
private String getQualsString() {
return UnifiedReadBackedPileup.quals2String(getQuals());
}
}

View File

@ -0,0 +1,420 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Dec 29, 2009
* Time: 12:25:55 PM
* To change this template use File | Settings | File Templates.
*/
public class UnifiedReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup {
private GenomeLoc loc = null;
private List<ExtendedEventPileupElement> pileup = null;
private int size = 0; // cached value of the size of the pileup
private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site
private int nDeletions = 0; // cached value of the number of deletions
private int nInsertions = 0;
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc without any aligned reads
*
*/
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc ) {
this(loc, new ArrayList<ExtendedEventPileupElement>(0));
}
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List<ExtendedEventPileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
this.loc = loc;
this.pileup = pileup;
calculatedCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List<ExtendedEventPileupElement> pileup, int size,
int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.maxDeletionLength = maxDeletionLength;
this.nDeletions = nDeletions;
this.nInsertions = nInsertions;
this.nMQ0Reads = nMQ0Reads;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
nInsertions = 0;
nMQ0Reads = 0;
for ( ExtendedEventPileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength());
} else {
if ( p.isInsertion() ) nInsertions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
// --------------------------------------------------------
//
// Special 'constructors'
//
// --------------------------------------------------------
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
@Override
public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup);
} else {
return this;
}
}
@Override
public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
Iterator positionIter = positions.iterator();
ArrayList<ExtendedEventPileupElement> downsampledPileup = new ArrayList<ExtendedEventPileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new UnifiedReadBackedExtendedEventPileup(getLocation(), downsampledPileup);
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<ExtendedEventPileupElement> iterator() {
return pileup.iterator();
}
/**
* Returns the number of deletion events in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
/**
* Returns the number of insertion events in this pileup
*
* @return
*/
@Override
public int getNumberOfInsertions() {
return nInsertions;
}
/** Returns the length of the longest deletion observed at the site this
* pileup is associated with (NOTE: by convention, both insertions and deletions
* are associated with genomic location immediately before the actual event). If
* there are no deletions at the site, returns 0.
* @return
*/
@Override
public int getMaxDeletionLength() {
return maxDeletionLength;
}
/**
* Returns the number of mapping quality zero reads in this pileup.
* @return
*/
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
// public int getNumberOfDeletions() {
// int n = 0;
//
// for ( int i = 0; i < size(); i++ ) {
// if ( getOffsets().get(i) != -1 ) { n++; }
// }
//
// return n;
// }
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
@Override
public String getShortPileupString() {
// In the pileup format, each extended event line has genomic position (chromosome name and offset),
// reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing
// insertion, deletion or no-event, respectively.
return String.format("%s %s E %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
new String(getEvents()) );
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getEvents() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) {
switch ( e.getType() ) {
case INSERTION: v[i] = 'I'; break;
case DELETION: v[i] = 'D'; break;
case NOEVENT: v[i] = '.'; break;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
i++;
}
return v;
}
/** A shortcut for getEventStringsWithCounts(null);
*
* @return
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts() {
return getEventStringsWithCounts(null);
}
/** Returns String representation of all distinct extended events (indels) at the site along with
* observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for
* deletions will be generated as "<length>D" (i.e. "5D"); if the reference bases are provided, the actual
* deleted sequence will be used in the string representation (e.g. "-AAC").
* @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and
* extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+<length of longest deletion>)
* @return list of distinct events; first element of a pair is a string representation of the event, second element
* gives the number of reads, in which that event was observed
*/
@Override
public List<Pair<String,Integer>> getEventStringsWithCounts(byte[] refBases) {
Map<String, Integer> events = new HashMap<String,Integer>();
for ( ExtendedEventPileupElement e : this ) {
Integer cnt;
String indel = null;
switch ( e.getType() ) {
case INSERTION:
indel = "+"+e.getEventBases();
break;
case DELETION:
indel = getDeletionString(e.getEventLength(),refBases);
break;
case NOEVENT: continue;
default: throw new StingException("Unknown event type encountered: "+e.getType());
}
cnt = events.get(indel);
if ( cnt == null ) events.put(indel,1);
else events.put(indel,cnt.intValue()+1);
}
List<Pair<String,Integer>> eventList = new ArrayList<Pair<String,Integer>>(events.size());
for ( Map.Entry<String,Integer> m : events.entrySet() ) {
eventList.add( new Pair<String,Integer>(m.getKey(),m.getValue()));
}
return eventList;
}
/**
* Builds string representation of the deletion event. If refBases is null, the representation will be
* "<length>D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC")
* will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted
* base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the
* deletion length (i.e. size of refBase must be at least length+1).
* @param length
* @param refBases
* @return
*/
static String getDeletionString(int length, byte[] refBases) {
if ( refBases == null ) {
return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D"
} else {
return "-"+new String(refBases,1,length).toUpperCase();
}
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
int i = 0;
for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); }
return v;
}
}

View File

@ -0,0 +1,550 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.*;
import net.sf.samtools.SAMRecord;
/**
* Version two file implementing pileups of bases in reads at a locus.
*
* @author Mark DePristo
*/
public class UnifiedReadBackedPileup implements ReadBackedPileup {
private GenomeLoc loc = null;
private List<PileupElement> pileup = null;
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
* go changing the reads.
*
* @param loc
* @param reads
* @param offsets
*/
public UnifiedReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
this(loc, readsOffsets2Pileup(reads, offsets));
}
public UnifiedReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
this(loc, readsOffsets2Pileup(reads, offset));
}
/**
* Create a new version of a read backed pileup at loc without any aligned reads
*
*/
public UnifiedReadBackedPileup(GenomeLoc loc ) {
this(loc, new ArrayList<PileupElement>(0));
}
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public UnifiedReadBackedPileup(GenomeLoc loc, List<PileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup");
this.loc = loc;
this.pileup = pileup;
calculatedCachedData();
}
/**
* Optimization of above constructor where all of the cached data is provided
* @param loc
* @param pileup
*/
public UnifiedReadBackedPileup(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup");
if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup");
this.loc = loc;
this.pileup = pileup;
this.size = size;
this.nDeletions = nDeletions;
this.nMQ0Reads = nMQ0Reads;
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
nMQ0Reads = 0;
for ( PileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
/**
* Helper routine for converting reads and offset lists to a PileupElement list.
*
* @param reads
* @param offsets
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offsets == null ) throw new StingException("Illegal null offsets list in UnifiedReadBackedPileup");
if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!");
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) );
}
return pileup;
}
/**
* Helper routine for converting reads and a single offset to a PileupElement list.
*
* @param reads
* @param offset
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offset < 0 ) throw new StingException("Illegal offset < 0 UnifiedReadBackedPileup");
ArrayList<PileupElement> pileup = new ArrayList<PileupElement>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
pileup.add( new PileupElement( reads.get(i), offset ) );
}
return pileup;
}
// --------------------------------------------------------
//
// Special 'constructors'
//
// --------------------------------------------------------
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
@Override
public ReadBackedPileup getPileupWithoutDeletions() {
if ( getNumberOfDeletions() > 0 ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( !p.isDeletion() ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedPileup(loc, filteredPileup);
} else {
return this;
}
}
/**
* Returns a new ReadBackedPileup where only one read from an overlapping read
* pair is retained. If the two reads in question disagree to their basecall,
* neither read is retained. If they agree on the base, the read with the higher
* quality observation is retained
*
* @return the newly filtered pileup
*/
@Override
public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
Map<String, PileupElement> filteredPileup = new HashMap<String, PileupElement>();
for ( PileupElement p : pileup ) {
String readName = p.getRead().getReadName();
// if we've never seen this read before, life is good
if (!filteredPileup.containsKey(readName)) {
filteredPileup.put(readName, p);
} else {
PileupElement existing = filteredPileup.get(readName);
// if the reads disagree at this position, throw them both out. Otherwise
// keep the element with the higher quality score
if (existing.getBase() != p.getBase()) {
filteredPileup.remove(readName);
} else {
if (existing.getQual() < p.getQual()) {
filteredPileup.put(readName, p);
}
}
}
}
return new UnifiedReadBackedPileup(loc, new ArrayList<PileupElement>(filteredPileup.values()));
}
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
@Override
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedPileup(loc, filteredPileup);
} else {
return this;
}
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) {
filteredPileup.add(p);
}
}
return new UnifiedReadBackedPileup(loc, filteredPileup);
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ
* @return
*/
@Override
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) {
return getBaseAndMappingFilteredPileup(minBaseQ, -1);
}
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
* This method allocates and returns a new instance of ReadBackedPileup.
* @param minMapQ
* @return
*/
@Override
public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) {
return getBaseAndMappingFilteredPileup(-1, minMapQ);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
@Override
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
Iterator positionIter = positions.iterator();
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new UnifiedReadBackedPileup(getLocation(), downsampledPileup);
}
@Override
public ReadBackedPileup getPileupForSample(String sampleName) {
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for(PileupElement p: this) {
SAMRecord read = p.getRead();
if(sampleName != null) {
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
filteredPileup.add(p);
}
else {
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
filteredPileup.add(p);
}
}
return filteredPileup.size()>0 ? new UnifiedReadBackedPileup(loc,filteredPileup) : null;
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
@Override
public Iterator<PileupElement> iterator() {
return pileup.iterator();
}
/**
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
* but also need access to the index of the pileup element in the pile.
*
* for (ExtendedPileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
// todo -- reimplement efficiently
// todo -- why is this public?
@Override
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
for ( PileupElement pile : this ) {
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/**
* Simple useful routine to count the number of deletion bases in this pileup
*
* @return
*/
@Override
public int getNumberOfDeletions() {
return nDeletions;
}
@Override
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
/**
* @return the number of elements in this pileup
*/
@Override
public int size() {
return size;
}
/**
* @return the location of this pileup
*/
@Override
public GenomeLoc getLocation() {
return loc;
}
/**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
* to BaseUtils.simpleBaseToBaseIndex for each base.
*
* @return
*/
@Override
public int[] getBaseCounts() {
int[] counts = new int[4];
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
if (index != -1)
counts[index]++;
}
}
return counts;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return
*/
@Override
public boolean hasSecondaryBases() {
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) )
return true;
}
return false;
}
@Override
public String getPileupString(char ref) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
return String.format("%s %s %c %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
ref, // reference base
new String(getBases()),
getQualsString());
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
/**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
/**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
@Override
public byte[] getQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
/**
* Get an array of the mapping qualities
* @return
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); }
return v;
}
//
// Private functions for printing pileups
//
private String getMappingQualsString() {
return quals2String(getMappingQuals());
}
static String quals2String( byte[] quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
qualStr.append(qualChar);
}
return qualStr.toString();
}
private String getQualsString() {
return quals2String(getQuals());
}
@Override
public String toString() {
StringBuilder s = new StringBuilder();
s.append(getLocation());
s.append(": ");
for ( PileupElement p : this ) {
s.append((char)p.getBase());
}
return s.toString();
}
}